10a8c90248264a8b26970b4473770bcc3df8515fJosh Gao"""Facility to use the Expat parser to load a minidom instance 20a8c90248264a8b26970b4473770bcc3df8515fJosh Gaofrom a string or file. 30a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 40a8c90248264a8b26970b4473770bcc3df8515fJosh GaoThis avoids all the overhead of SAX and pulldom to gain performance. 50a8c90248264a8b26970b4473770bcc3df8515fJosh Gao""" 60a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 70a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Warning! 80a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 90a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# This module is tightly bound to the implementation details of the 100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# minidom DOM and can't be used with other DOM implementations. This 110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# is due, in part, to a lack of appropriate methods in the DOM (there is 120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# no way to create Entity and Notation nodes via the DOM Level 2 130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# interface), and for performance. The later is the cause of some fairly 140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# cryptic code. 150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Performance hacks: 170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# - .character_data_handler() has an extra case in which continuing 190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# data is appended to an existing Text node; this can be a 200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# speedup since pyexpat can break up character data into multiple 210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# callbacks even though we set the buffer_text attribute on the 220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# parser. This also gives us the advantage that we don't need a 230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# separate normalization pass. 240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# - Determining that a node exists is done using an identity comparison 260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# with None rather than a truth test; this avoids searching for and 270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# calling any methods on the node object if it exists. (A rather 280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# nice speedup is achieved this way as well!) 290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 300a8c90248264a8b26970b4473770bcc3df8515fJosh Gaofrom xml.dom import xmlbuilder, minidom, Node 310a8c90248264a8b26970b4473770bcc3df8515fJosh Gaofrom xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE 320a8c90248264a8b26970b4473770bcc3df8515fJosh Gaofrom xml.parsers import expat 330a8c90248264a8b26970b4473770bcc3df8515fJosh Gaofrom xml.dom.minidom import _append_child, _set_attribute_node 340a8c90248264a8b26970b4473770bcc3df8515fJosh Gaofrom xml.dom.NodeFilter import NodeFilter 350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 360a8c90248264a8b26970b4473770bcc3df8515fJosh Gaofrom xml.dom.minicompat import * 370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 380a8c90248264a8b26970b4473770bcc3df8515fJosh GaoTEXT_NODE = Node.TEXT_NODE 390a8c90248264a8b26970b4473770bcc3df8515fJosh GaoCDATA_SECTION_NODE = Node.CDATA_SECTION_NODE 400a8c90248264a8b26970b4473770bcc3df8515fJosh GaoDOCUMENT_NODE = Node.DOCUMENT_NODE 410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 420a8c90248264a8b26970b4473770bcc3df8515fJosh GaoFILTER_ACCEPT = xmlbuilder.DOMBuilderFilter.FILTER_ACCEPT 430a8c90248264a8b26970b4473770bcc3df8515fJosh GaoFILTER_REJECT = xmlbuilder.DOMBuilderFilter.FILTER_REJECT 440a8c90248264a8b26970b4473770bcc3df8515fJosh GaoFILTER_SKIP = xmlbuilder.DOMBuilderFilter.FILTER_SKIP 450a8c90248264a8b26970b4473770bcc3df8515fJosh GaoFILTER_INTERRUPT = xmlbuilder.DOMBuilderFilter.FILTER_INTERRUPT 460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 470a8c90248264a8b26970b4473770bcc3df8515fJosh GaotheDOMImplementation = minidom.getDOMImplementation() 480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Expat typename -> TypeInfo 500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_typeinfo_map = { 510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "CDATA": minidom.TypeInfo(None, "cdata"), 520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "ENUM": minidom.TypeInfo(None, "enumeration"), 530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "ENTITY": minidom.TypeInfo(None, "entity"), 540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "ENTITIES": minidom.TypeInfo(None, "entities"), 550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "ID": minidom.TypeInfo(None, "id"), 560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "IDREF": minidom.TypeInfo(None, "idref"), 570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "IDREFS": minidom.TypeInfo(None, "idrefs"), 580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "NMTOKEN": minidom.TypeInfo(None, "nmtoken"), 590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "NMTOKENS": minidom.TypeInfo(None, "nmtokens"), 600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao } 610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 620a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass ElementInfo(object): 630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao __slots__ = '_attr_info', '_model', 'tagName' 640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, tagName, model=None): 660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.tagName = tagName 670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._attr_info = [] 680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._model = model 690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __getstate__(self): 710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self._attr_info, self._model, self.tagName 720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __setstate__(self, state): 740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._attr_info, self._model, self.tagName = state 750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def getAttributeType(self, aname): 770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for info in self._attr_info: 780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if info[1] == aname: 790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao t = info[-2] 800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if t[0] == "(": 810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return _typeinfo_map["ENUM"] 820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return _typeinfo_map[info[-2]] 840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return minidom._no_type 850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def getAttributeTypeNS(self, namespaceURI, localName): 870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return minidom._no_type 880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def isElementContent(self): 900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._model: 910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao type = self._model[0] 920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return type not in (expat.model.XML_CTYPE_ANY, 930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao expat.model.XML_CTYPE_MIXED) 940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return False 960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def isEmpty(self): 980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._model: 990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self._model[0] == expat.model.XML_CTYPE_EMPTY 1000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 1010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return False 1020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def isId(self, aname): 1040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for info in self._attr_info: 1050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if info[1] == aname: 1060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return info[-2] == "ID" 1070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return False 1080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def isIdNS(self, euri, ename, auri, aname): 1100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # not sure this is meaningful 1110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.isId((auri, aname)) 1120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1130a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _intern(builder, s): 1140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return builder._intern_setdefault(s, s) 1150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1160a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _parse_ns_name(builder, name): 1170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao assert ' ' in name 1180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parts = name.split(' ') 1190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao intern = builder._intern_setdefault 1200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if len(parts) == 3: 1210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao uri, localname, prefix = parts 1220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao prefix = intern(prefix, prefix) 1230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao qname = "%s:%s" % (prefix, localname) 1240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao qname = intern(qname, qname) 1250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao localname = intern(localname, localname) 1260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 1270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao uri, localname = parts 1280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao prefix = EMPTY_PREFIX 1290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao qname = localname = intern(localname, localname) 1300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return intern(uri, uri), localname, prefix, qname 1310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1330a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass ExpatBuilder: 1340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Document builder that uses Expat to build a ParsedXML.DOM document 1350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao instance.""" 1360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, options=None): 1380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if options is None: 1390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao options = xmlbuilder.Options() 1400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._options = options 1410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._options.filter is not None: 1420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._filter = FilterVisibilityController(self._options.filter) 1430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 1440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._filter = None 1450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # This *really* doesn't do anything in this case, so 1460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # override it with something fast & minimal. 1470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._finish_start_element = id 1480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._parser = None 1490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.reset() 1500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def createParser(self): 1520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Create a new parser object.""" 1530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return expat.ParserCreate() 1540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def getParser(self): 1560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return the parser object, creating a new one if needed.""" 1570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not self._parser: 1580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._parser = self.createParser() 1590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._intern_setdefault = self._parser.intern.setdefault 1600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._parser.buffer_text = True 1610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._parser.ordered_attributes = True 1620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._parser.specified_attributes = True 1630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.install(self._parser) 1640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self._parser 1650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def reset(self): 1670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Free all data structures used during DOM construction.""" 1680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.document = theDOMImplementation.createDocument( 1690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao EMPTY_NAMESPACE, None, None) 1700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.curNode = self.document 1710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._elem_info = self.document._elem_info 1720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._cdata = False 1730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def install(self, parser): 1750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Install the callbacks needed to build the DOM into the parser.""" 1760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # This creates circular references! 1770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler 1780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.StartElementHandler = self.first_element_handler 1790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.EndElementHandler = self.end_element_handler 1800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.ProcessingInstructionHandler = self.pi_handler 1810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._options.entities: 1820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.EntityDeclHandler = self.entity_decl_handler 1830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.NotationDeclHandler = self.notation_decl_handler 1840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._options.comments: 1850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.CommentHandler = self.comment_handler 1860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._options.cdata_sections: 1870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.StartCdataSectionHandler = self.start_cdata_section_handler 1880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.EndCdataSectionHandler = self.end_cdata_section_handler 1890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.CharacterDataHandler = self.character_data_handler_cdata 1900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 1910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.CharacterDataHandler = self.character_data_handler 1920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.ExternalEntityRefHandler = self.external_entity_ref_handler 1930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.XmlDeclHandler = self.xml_decl_handler 1940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.ElementDeclHandler = self.element_decl_handler 1950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.AttlistDeclHandler = self.attlist_decl_handler 1960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def parseFile(self, file): 1980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Parse a document from a file object, returning the document 1990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao node.""" 2000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser = self.getParser() 2010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao first_buffer = True 2020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 2030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao while 1: 2040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buffer = file.read(16*1024) 2050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not buffer: 2060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao break 2070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.Parse(buffer, 0) 2080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if first_buffer and self.document.documentElement: 2090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._setup_subset(buffer) 2100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao first_buffer = False 2110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.Parse("", True) 2120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except ParseEscape: 2130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 2140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao doc = self.document 2150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.reset() 2160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._parser = None 2170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return doc 2180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def parseString(self, string): 2200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Parse a document from a string, returning the document node.""" 2210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser = self.getParser() 2220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 2230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.Parse(string, True) 2240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._setup_subset(string) 2250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except ParseEscape: 2260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 2270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao doc = self.document 2280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.reset() 2290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._parser = None 2300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return doc 2310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _setup_subset(self, buffer): 2330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Load the internal subset if there might be one.""" 2340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.document.doctype: 2350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao extractor = InternalSubsetExtractor() 2360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao extractor.parseString(buffer) 2370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao subset = extractor.getSubset() 2380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.document.doctype.internalSubset = subset 2390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def start_doctype_decl_handler(self, doctypeName, systemId, publicId, 2410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao has_internal_subset): 2420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao doctype = self.document.implementation.createDocumentType( 2430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao doctypeName, publicId, systemId) 2440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao doctype.ownerDocument = self.document 2450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _append_child(self.document, doctype) 2460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.document.doctype = doctype 2470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._filter and self._filter.acceptNode(doctype) == FILTER_REJECT: 2480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.document.doctype = None 2490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao del self.document.childNodes[-1] 2500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao doctype = None 2510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._parser.EntityDeclHandler = None 2520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._parser.NotationDeclHandler = None 2530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if has_internal_subset: 2540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if doctype is not None: 2550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao doctype.entities._seq = [] 2560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao doctype.notations._seq = [] 2570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._parser.CommentHandler = None 2580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._parser.ProcessingInstructionHandler = None 2590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._parser.EndDoctypeDeclHandler = self.end_doctype_decl_handler 2600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def end_doctype_decl_handler(self): 2620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._options.comments: 2630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._parser.CommentHandler = self.comment_handler 2640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._parser.ProcessingInstructionHandler = self.pi_handler 2650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not (self._elem_info or self._filter): 2660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._finish_end_element = id 2670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def pi_handler(self, target, data): 2690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao node = self.document.createProcessingInstruction(target, data) 2700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _append_child(self.curNode, node) 2710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._filter and self._filter.acceptNode(node) == FILTER_REJECT: 2720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.curNode.removeChild(node) 2730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def character_data_handler_cdata(self, data): 2750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao childNodes = self.curNode.childNodes 2760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._cdata: 2770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if ( self._cdata_continue 2780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao and childNodes[-1].nodeType == CDATA_SECTION_NODE): 2790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao childNodes[-1].appendData(data) 2800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 2810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao node = self.document.createCDATASection(data) 2820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._cdata_continue = True 2830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif childNodes and childNodes[-1].nodeType == TEXT_NODE: 2840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao node = childNodes[-1] 2850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao value = node.data + data 2860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao d = node.__dict__ 2870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao d['data'] = d['nodeValue'] = value 2880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 2890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 2900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao node = minidom.Text() 2910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao d = node.__dict__ 2920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao d['data'] = d['nodeValue'] = data 2930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao d['ownerDocument'] = self.document 2940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _append_child(self.curNode, node) 2950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def character_data_handler(self, data): 2970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao childNodes = self.curNode.childNodes 2980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if childNodes and childNodes[-1].nodeType == TEXT_NODE: 2990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao node = childNodes[-1] 3000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao d = node.__dict__ 3010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao d['data'] = d['nodeValue'] = node.data + data 3020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 3030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao node = minidom.Text() 3040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao d = node.__dict__ 3050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao d['data'] = d['nodeValue'] = node.data + data 3060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao d['ownerDocument'] = self.document 3070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _append_child(self.curNode, node) 3080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def entity_decl_handler(self, entityName, is_parameter_entity, value, 3100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao base, systemId, publicId, notationName): 3110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if is_parameter_entity: 3120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # we don't care about parameter entities for the DOM 3130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 3140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not self._options.entities: 3150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 3160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao node = self.document._create_entity(entityName, publicId, 3170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao systemId, notationName) 3180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if value is not None: 3190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # internal entity 3200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # node *should* be readonly, but we'll cheat 3210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao child = self.document.createTextNode(value) 3220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao node.childNodes.append(child) 3230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.document.doctype.entities._seq.append(node) 3240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._filter and self._filter.acceptNode(node) == FILTER_REJECT: 3250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao del self.document.doctype.entities._seq[-1] 3260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def notation_decl_handler(self, notationName, base, systemId, publicId): 3280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao node = self.document._create_notation(notationName, publicId, systemId) 3290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.document.doctype.notations._seq.append(node) 3300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._filter and self._filter.acceptNode(node) == FILTER_ACCEPT: 3310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao del self.document.doctype.notations._seq[-1] 3320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def comment_handler(self, data): 3340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao node = self.document.createComment(data) 3350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _append_child(self.curNode, node) 3360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._filter and self._filter.acceptNode(node) == FILTER_REJECT: 3370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.curNode.removeChild(node) 3380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def start_cdata_section_handler(self): 3400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._cdata = True 3410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._cdata_continue = False 3420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def end_cdata_section_handler(self): 3440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._cdata = False 3450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._cdata_continue = False 3460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def external_entity_ref_handler(self, context, base, systemId, publicId): 3480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 1 3490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def first_element_handler(self, name, attributes): 3510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._filter is None and not self._elem_info: 3520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._finish_end_element = id 3530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.getParser().StartElementHandler = self.start_element_handler 3540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.start_element_handler(name, attributes) 3550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def start_element_handler(self, name, attributes): 3570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao node = self.document.createElement(name) 3580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _append_child(self.curNode, node) 3590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.curNode = node 3600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if attributes: 3620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for i in range(0, len(attributes), 2): 3630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao a = minidom.Attr(attributes[i], EMPTY_NAMESPACE, 3640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao None, EMPTY_PREFIX) 3650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao value = attributes[i+1] 3660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao d = a.childNodes[0].__dict__ 3670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao d['data'] = d['nodeValue'] = value 3680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao d = a.__dict__ 3690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao d['value'] = d['nodeValue'] = value 3700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao d['ownerDocument'] = self.document 3710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _set_attribute_node(node, a) 3720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if node is not self.document.documentElement: 3740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._finish_start_element(node) 3750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _finish_start_element(self, node): 3770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._filter: 3780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # To be general, we'd have to call isSameNode(), but this 3790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # is sufficient for minidom: 3800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if node is self.document.documentElement: 3810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 3820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao filt = self._filter.startContainer(node) 3830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if filt == FILTER_REJECT: 3840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # ignore this node & all descendents 3850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Rejecter(self) 3860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif filt == FILTER_SKIP: 3870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # ignore this node, but make it's children become 3880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # children of the parent node 3890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Skipper(self) 3900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 3910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 3920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.curNode = node.parentNode 3930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao node.parentNode.removeChild(node) 3940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao node.unlink() 3950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # If this ever changes, Namespaces.end_element_handler() needs to 3970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # be changed to match. 3980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 3990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def end_element_handler(self, name): 4000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao curNode = self.curNode 4010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.curNode = curNode.parentNode 4020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._finish_end_element(curNode) 4030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _finish_end_element(self, curNode): 4050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao info = self._elem_info.get(curNode.tagName) 4060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if info: 4070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._handle_white_text_nodes(curNode, info) 4080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._filter: 4090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if curNode is self.document.documentElement: 4100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 4110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._filter.acceptNode(curNode) == FILTER_REJECT: 4120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.curNode.removeChild(curNode) 4130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao curNode.unlink() 4140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _handle_white_text_nodes(self, node, info): 4160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if (self._options.whitespace_in_element_content 4170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao or not info.isElementContent()): 4180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 4190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # We have element type information and should remove ignorable 4210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # whitespace; identify for text nodes which contain only 4220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # whitespace. 4230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao L = [] 4240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for child in node.childNodes: 4250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if child.nodeType == TEXT_NODE and not child.data.strip(): 4260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao L.append(child) 4270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Remove ignorable whitespace from the tree. 4290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for child in L: 4300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao node.removeChild(child) 4310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def element_decl_handler(self, name, model): 4330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao info = self._elem_info.get(name) 4340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if info is None: 4350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._elem_info[name] = ElementInfo(name, model) 4360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 4370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao assert info._model is None 4380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao info._model = model 4390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def attlist_decl_handler(self, elem, name, type, default, required): 4410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao info = self._elem_info.get(elem) 4420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if info is None: 4430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao info = ElementInfo(elem) 4440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._elem_info[elem] = info 4450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao info._attr_info.append( 4460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao [None, name, None, None, default, 0, type, required]) 4470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def xml_decl_handler(self, version, encoding, standalone): 4490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.document.version = version 4500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.document.encoding = encoding 4510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # This is still a little ugly, thanks to the pyexpat API. ;-( 4520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if standalone >= 0: 4530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if standalone: 4540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.document.standalone = True 4550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 4560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.document.standalone = False 4570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Don't include FILTER_INTERRUPT, since that's checked separately 4600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# where allowed. 4610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_ALLOWED_FILTER_RETURNS = (FILTER_ACCEPT, FILTER_REJECT, FILTER_SKIP) 4620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4630a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass FilterVisibilityController(object): 4640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Wrapper around a DOMBuilderFilter which implements the checks 4650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao to make the whatToShow filter attribute work.""" 4660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao __slots__ = 'filter', 4680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, filter): 4700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.filter = filter 4710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def startContainer(self, node): 4730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao mask = self._nodetype_mask[node.nodeType] 4740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.filter.whatToShow & mask: 4750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao val = self.filter.startContainer(node) 4760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if val == FILTER_INTERRUPT: 4770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ParseEscape 4780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if val not in _ALLOWED_FILTER_RETURNS: 4790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ValueError, \ 4800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "startContainer() returned illegal value: " + repr(val) 4810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return val 4820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 4830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return FILTER_ACCEPT 4840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def acceptNode(self, node): 4860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao mask = self._nodetype_mask[node.nodeType] 4870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.filter.whatToShow & mask: 4880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao val = self.filter.acceptNode(node) 4890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if val == FILTER_INTERRUPT: 4900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ParseEscape 4910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if val == FILTER_SKIP: 4920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # move all child nodes to the parent, and remove this node 4930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parent = node.parentNode 4940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for child in node.childNodes[:]: 4950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parent.appendChild(child) 4960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # node is handled by the caller 4970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return FILTER_REJECT 4980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if val not in _ALLOWED_FILTER_RETURNS: 4990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ValueError, \ 5000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "acceptNode() returned illegal value: " + repr(val) 5010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return val 5020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 5030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return FILTER_ACCEPT 5040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _nodetype_mask = { 5060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Node.ELEMENT_NODE: NodeFilter.SHOW_ELEMENT, 5070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Node.ATTRIBUTE_NODE: NodeFilter.SHOW_ATTRIBUTE, 5080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Node.TEXT_NODE: NodeFilter.SHOW_TEXT, 5090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Node.CDATA_SECTION_NODE: NodeFilter.SHOW_CDATA_SECTION, 5100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Node.ENTITY_REFERENCE_NODE: NodeFilter.SHOW_ENTITY_REFERENCE, 5110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Node.ENTITY_NODE: NodeFilter.SHOW_ENTITY, 5120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Node.PROCESSING_INSTRUCTION_NODE: NodeFilter.SHOW_PROCESSING_INSTRUCTION, 5130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Node.COMMENT_NODE: NodeFilter.SHOW_COMMENT, 5140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Node.DOCUMENT_NODE: NodeFilter.SHOW_DOCUMENT, 5150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Node.DOCUMENT_TYPE_NODE: NodeFilter.SHOW_DOCUMENT_TYPE, 5160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Node.DOCUMENT_FRAGMENT_NODE: NodeFilter.SHOW_DOCUMENT_FRAGMENT, 5170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Node.NOTATION_NODE: NodeFilter.SHOW_NOTATION, 5180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao } 5190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5210a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass FilterCrutch(object): 5220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao __slots__ = '_builder', '_level', '_old_start', '_old_end' 5230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, builder): 5250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._level = 0 5260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._builder = builder 5270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser = builder._parser 5280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._old_start = parser.StartElementHandler 5290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._old_end = parser.EndElementHandler 5300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.StartElementHandler = self.start_element_handler 5310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.EndElementHandler = self.end_element_handler 5320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5330a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass Rejecter(FilterCrutch): 5340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao __slots__ = () 5350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, builder): 5370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao FilterCrutch.__init__(self, builder) 5380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser = builder._parser 5390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for name in ("ProcessingInstructionHandler", 5400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "CommentHandler", 5410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "CharacterDataHandler", 5420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "StartCdataSectionHandler", 5430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "EndCdataSectionHandler", 5440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "ExternalEntityRefHandler", 5450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ): 5460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao setattr(parser, name, None) 5470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def start_element_handler(self, *args): 5490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._level = self._level + 1 5500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def end_element_handler(self, *args): 5520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._level == 0: 5530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # restore the old handlers 5540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser = self._builder._parser 5550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._builder.install(parser) 5560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.StartElementHandler = self._old_start 5570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.EndElementHandler = self._old_end 5580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 5590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._level = self._level - 1 5600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5610a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass Skipper(FilterCrutch): 5620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao __slots__ = () 5630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def start_element_handler(self, *args): 5650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao node = self._builder.curNode 5660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._old_start(*args) 5670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._builder.curNode is not node: 5680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._level = self._level + 1 5690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def end_element_handler(self, *args): 5710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._level == 0: 5720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # We're popping back out of the node we're skipping, so we 5730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # shouldn't need to do anything but reset the handlers. 5740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._builder._parser.StartElementHandler = self._old_start 5750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._builder._parser.EndElementHandler = self._old_end 5760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._builder = None 5770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 5780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._level = self._level - 1 5790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._old_end(*args) 5800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# framework document used by the fragment builder. 5830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Takes a string for the doctype, subset string, and namespace attrs string. 5840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID = \ 5860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "http://xml.python.org/entities/fragment-builder/internal" 5870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_FRAGMENT_BUILDER_TEMPLATE = ( 5890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao '''\ 5900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao<!DOCTYPE wrapper 5910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao %%s [ 5920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao <!ENTITY fragment-builder-internal 5930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao SYSTEM "%s"> 5940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao%%s 5950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao]> 5960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao<wrapper %%s 5970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao>&fragment-builder-internal;</wrapper>''' 5980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao % _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID) 5990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6010a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass FragmentBuilder(ExpatBuilder): 6020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Builder which constructs document fragments given XML source 6030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao text and a context node. 6040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao The context node is expected to provide information about the 6060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao namespace declarations which are in scope at the start of the 6070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fragment. 6080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 6090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, context, options=None): 6110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if context.nodeType == DOCUMENT_NODE: 6120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.originalDocument = context 6130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.context = context 6140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 6150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.originalDocument = context.ownerDocument 6160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.context = context 6170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ExpatBuilder.__init__(self, options) 6180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def reset(self): 6200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ExpatBuilder.reset(self) 6210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fragment = None 6220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def parseFile(self, file): 6240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Parse a document fragment from a file object, returning the 6250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fragment node.""" 6260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.parseString(file.read()) 6270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def parseString(self, string): 6290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Parse a document fragment from a string, returning the 6300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fragment node.""" 6310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._source = string 6320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser = self.getParser() 6330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao doctype = self.originalDocument.doctype 6340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ident = "" 6350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if doctype: 6360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao subset = doctype.internalSubset or self._getDeclarations() 6370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if doctype.publicId: 6380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ident = ('PUBLIC "%s" "%s"' 6390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao % (doctype.publicId, doctype.systemId)) 6400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif doctype.systemId: 6410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ident = 'SYSTEM "%s"' % doctype.systemId 6420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 6430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao subset = "" 6440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao nsattrs = self._getNSattrs() # get ns decls from node's ancestors 6450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao document = _FRAGMENT_BUILDER_TEMPLATE % (ident, subset, nsattrs) 6460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 6470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.Parse(document, 1) 6480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except: 6490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.reset() 6500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise 6510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fragment = self.fragment 6520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.reset() 6530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao## self._parser = None 6540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return fragment 6550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _getDeclarations(self): 6570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Re-create the internal subset from the DocumentType node. 6580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao This is only needed if we don't already have the 6600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao internalSubset as a string. 6610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 6620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao doctype = self.context.ownerDocument.doctype 6630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s = "" 6640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if doctype: 6650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for i in range(doctype.notations.length): 6660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao notation = doctype.notations.item(i) 6670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if s: 6680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s = s + "\n " 6690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s = "%s<!NOTATION %s" % (s, notation.nodeName) 6700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if notation.publicId: 6710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s = '%s PUBLIC "%s"\n "%s">' \ 6720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao % (s, notation.publicId, notation.systemId) 6730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 6740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s = '%s SYSTEM "%s">' % (s, notation.systemId) 6750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for i in range(doctype.entities.length): 6760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao entity = doctype.entities.item(i) 6770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if s: 6780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s = s + "\n " 6790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s = "%s<!ENTITY %s" % (s, entity.nodeName) 6800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if entity.publicId: 6810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s = '%s PUBLIC "%s"\n "%s"' \ 6820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao % (s, entity.publicId, entity.systemId) 6830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif entity.systemId: 6840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s = '%s SYSTEM "%s"' % (s, entity.systemId) 6850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 6860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s = '%s "%s"' % (s, entity.firstChild.data) 6870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if entity.notationName: 6880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s = "%s NOTATION %s" % (s, entity.notationName) 6890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s = s + ">" 6900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return s 6910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _getNSattrs(self): 6930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return "" 6940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def external_entity_ref_handler(self, context, base, systemId, publicId): 6960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if systemId == _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID: 6970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # this entref is the one that we made to put the subtree 6980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # in; all of our given input is parsed in here. 6990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao old_document = self.document 7000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao old_cur_node = self.curNode 7010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser = self._parser.ExternalEntityParserCreate(context) 7020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # put the real document back, parse into the fragment to return 7030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.document = self.originalDocument 7040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fragment = self.document.createDocumentFragment() 7050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.curNode = self.fragment 7060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 7070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.Parse(self._source, 1) 7080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao finally: 7090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.curNode = old_cur_node 7100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.document = old_document 7110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._source = None 7120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return -1 7130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 7140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return ExpatBuilder.external_entity_ref_handler( 7150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self, context, base, systemId, publicId) 7160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7180a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass Namespaces: 7190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Mix-in class for builders; adds support for namespaces.""" 7200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _initNamespaces(self): 7220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # list of (prefix, uri) ns declarations. Namespace attrs are 7230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # constructed from this and added to the element's attrs. 7240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._ns_ordered_prefixes = [] 7250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def createParser(self): 7270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Create a new namespace-handling parser.""" 7280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser = expat.ParserCreate(namespace_separator=" ") 7290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.namespace_prefixes = True 7300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return parser 7310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def install(self, parser): 7330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Insert the namespace-handlers onto the parser.""" 7340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ExpatBuilder.install(self, parser) 7350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._options.namespace_declarations: 7360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.StartNamespaceDeclHandler = ( 7370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.start_namespace_decl_handler) 7380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def start_namespace_decl_handler(self, prefix, uri): 7400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Push this namespace declaration on our storage.""" 7410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._ns_ordered_prefixes.append((prefix, uri)) 7420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def start_element_handler(self, name, attributes): 7440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if ' ' in name: 7450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao uri, localname, prefix, qname = _parse_ns_name(self, name) 7460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 7470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao uri = EMPTY_NAMESPACE 7480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao qname = name 7490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao localname = None 7500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao prefix = EMPTY_PREFIX 7510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao node = minidom.Element(qname, uri, prefix, localname) 7520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao node.ownerDocument = self.document 7530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _append_child(self.curNode, node) 7540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.curNode = node 7550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._ns_ordered_prefixes: 7570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for prefix, uri in self._ns_ordered_prefixes: 7580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if prefix: 7590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao a = minidom.Attr(_intern(self, 'xmlns:' + prefix), 7600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao XMLNS_NAMESPACE, prefix, "xmlns") 7610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 7620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao a = minidom.Attr("xmlns", XMLNS_NAMESPACE, 7630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "xmlns", EMPTY_PREFIX) 7640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao d = a.childNodes[0].__dict__ 7650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao d['data'] = d['nodeValue'] = uri 7660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao d = a.__dict__ 7670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao d['value'] = d['nodeValue'] = uri 7680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao d['ownerDocument'] = self.document 7690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _set_attribute_node(node, a) 7700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao del self._ns_ordered_prefixes[:] 7710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if attributes: 7730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _attrs = node._attrs 7740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _attrsNS = node._attrsNS 7750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for i in range(0, len(attributes), 2): 7760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao aname = attributes[i] 7770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao value = attributes[i+1] 7780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if ' ' in aname: 7790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao uri, localname, prefix, qname = _parse_ns_name(self, aname) 7800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao a = minidom.Attr(qname, uri, localname, prefix) 7810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _attrs[qname] = a 7820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _attrsNS[(uri, localname)] = a 7830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 7840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao a = minidom.Attr(aname, EMPTY_NAMESPACE, 7850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao aname, EMPTY_PREFIX) 7860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _attrs[aname] = a 7870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _attrsNS[(EMPTY_NAMESPACE, aname)] = a 7880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao d = a.childNodes[0].__dict__ 7890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao d['data'] = d['nodeValue'] = value 7900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao d = a.__dict__ 7910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao d['ownerDocument'] = self.document 7920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao d['value'] = d['nodeValue'] = value 7930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao d['ownerElement'] = node 7940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if __debug__: 7960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # This only adds some asserts to the original 7970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # end_element_handler(), so we only define this when -O is not 7980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # used. If changing one, be sure to check the other to see if 7990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # it needs to be changed as well. 8000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 8010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def end_element_handler(self, name): 8020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao curNode = self.curNode 8030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if ' ' in name: 8040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao uri, localname, prefix, qname = _parse_ns_name(self, name) 8050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao assert (curNode.namespaceURI == uri 8060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao and curNode.localName == localname 8070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao and curNode.prefix == prefix), \ 8080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "element stack messed up! (namespace)" 8090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 8100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao assert curNode.nodeName == name, \ 8110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "element stack messed up - bad nodeName" 8120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao assert curNode.namespaceURI == EMPTY_NAMESPACE, \ 8130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "element stack messed up - bad namespaceURI" 8140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.curNode = curNode.parentNode 8150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._finish_end_element(curNode) 8160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8180a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass ExpatBuilderNS(Namespaces, ExpatBuilder): 8190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Document builder that supports namespaces.""" 8200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def reset(self): 8220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ExpatBuilder.reset(self) 8230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._initNamespaces() 8240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8260a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass FragmentBuilderNS(Namespaces, FragmentBuilder): 8270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Fragment builder that supports namespaces.""" 8280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def reset(self): 8300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao FragmentBuilder.reset(self) 8310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._initNamespaces() 8320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _getNSattrs(self): 8340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return string of namespace attributes from this element and 8350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ancestors.""" 8360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # XXX This needs to be re-written to walk the ancestors of the 8370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # context to build up the namespace information from 8380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # declarations, elements, and attributes found in context. 8390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Otherwise we have to store a bunch more data on the DOM 8400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # (though that *might* be more reliable -- not clear). 8410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao attrs = "" 8420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao context = self.context 8430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao L = [] 8440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao while context: 8450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if hasattr(context, '_ns_prefix_uri'): 8460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for prefix, uri in context._ns_prefix_uri.items(): 8470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # add every new NS decl from context to L and attrs string 8480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if prefix in L: 8490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao continue 8500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao L.append(prefix) 8510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if prefix: 8520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao declname = "xmlns:" + prefix 8530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 8540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao declname = "xmlns" 8550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if attrs: 8560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao attrs = "%s\n %s='%s'" % (attrs, declname, uri) 8570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 8580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao attrs = " %s='%s'" % (declname, uri) 8590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao context = context.parentNode 8600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return attrs 8610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8630a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass ParseEscape(Exception): 8640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Exception raised to short-circuit parsing in InternalSubsetExtractor.""" 8650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 8660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8670a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass InternalSubsetExtractor(ExpatBuilder): 8680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """XML processor which can rip out the internal document type subset.""" 8690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao subset = None 8710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def getSubset(self): 8730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return the internal subset as a string.""" 8740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.subset 8750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def parseFile(self, file): 8770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 8780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ExpatBuilder.parseFile(self, file) 8790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except ParseEscape: 8800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 8810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def parseString(self, string): 8830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 8840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ExpatBuilder.parseString(self, string) 8850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except ParseEscape: 8860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 8870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def install(self, parser): 8890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler 8900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.StartElementHandler = self.start_element_handler 8910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def start_doctype_decl_handler(self, name, publicId, systemId, 8930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao has_internal_subset): 8940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if has_internal_subset: 8950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser = self.getParser() 8960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.subset = [] 8970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.DefaultHandler = self.subset.append 8980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.EndDoctypeDeclHandler = self.end_doctype_decl_handler 8990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 9000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ParseEscape() 9010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def end_doctype_decl_handler(self): 9030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s = ''.join(self.subset).replace('\r\n', '\n').replace('\r', '\n') 9040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.subset = s 9050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ParseEscape() 9060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def start_element_handler(self, name, attrs): 9080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ParseEscape() 9090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9110a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef parse(file, namespaces=True): 9120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Parse a document, returning the resulting Document node. 9130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'file' may be either a file name or an open file object. 9150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 9160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if namespaces: 9170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao builder = ExpatBuilderNS() 9180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 9190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao builder = ExpatBuilder() 9200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if isinstance(file, StringTypes): 9220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fp = open(file, 'rb') 9230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 9240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao result = builder.parseFile(fp) 9250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao finally: 9260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fp.close() 9270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 9280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao result = builder.parseFile(file) 9290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return result 9300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9320a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef parseString(string, namespaces=True): 9330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Parse a document from a string, returning the resulting 9340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Document node. 9350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 9360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if namespaces: 9370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao builder = ExpatBuilderNS() 9380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 9390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao builder = ExpatBuilder() 9400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return builder.parseString(string) 9410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9430a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef parseFragment(file, context, namespaces=True): 9440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Parse a fragment of a document, given the context from which it 9450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao was originally extracted. context should be the parent of the 9460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao node(s) which are in the fragment. 9470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'file' may be either a file name or an open file object. 9490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 9500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if namespaces: 9510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao builder = FragmentBuilderNS(context) 9520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 9530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao builder = FragmentBuilder(context) 9540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if isinstance(file, StringTypes): 9560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fp = open(file, 'rb') 9570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 9580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao result = builder.parseFile(fp) 9590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao finally: 9600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fp.close() 9610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 9620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao result = builder.parseFile(file) 9630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return result 9640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9660a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef parseFragmentString(string, context, namespaces=True): 9670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Parse a fragment of a document from a string, given the context 9680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao from which it was originally extracted. context should be the 9690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parent of the node(s) which are in the fragment. 9700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 9710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if namespaces: 9720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao builder = FragmentBuilderNS(context) 9730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 9740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao builder = FragmentBuilder(context) 9750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return builder.parseString(string) 9760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9780a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef makeBuilder(options): 9790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Create a builder based on an Options object.""" 9800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if options.namespaces: 9810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return ExpatBuilderNS(options) 9820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 9830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return ExpatBuilder(options) 984