13257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel"""Facility to use the Expat parser to load a minidom instance
23257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielfrom a string or file.
33257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
43257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielThis avoids all the overhead of SAX and pulldom to gain performance.
53257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel"""
63257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
73257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# Warning!
83257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel#
93257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# This module is tightly bound to the implementation details of the
103257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# minidom DOM and can't be used with other DOM implementations.  This
113257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# is due, in part, to a lack of appropriate methods in the DOM (there is
123257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# no way to create Entity and Notation nodes via the DOM Level 2
133257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# interface), and for performance.  The later is the cause of some fairly
143257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# cryptic code.
153257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel#
163257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# Performance hacks:
173257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel#
183257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel#   -  .character_data_handler() has an extra case in which continuing
193257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel#      data is appended to an existing Text node; this can be a
203257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel#      speedup since pyexpat can break up character data into multiple
213257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel#      callbacks even though we set the buffer_text attribute on the
223257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel#      parser.  This also gives us the advantage that we don't need a
233257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel#      separate normalization pass.
243257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel#
253257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel#   -  Determining that a node exists is done using an identity comparison
263257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel#      with None rather than a truth test; this avoids searching for and
273257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel#      calling any methods on the node object if it exists.  (A rather
283257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel#      nice speedup is achieved this way as well!)
293257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
303257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielfrom xml.dom import xmlbuilder, minidom, Node
313257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielfrom xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE
323257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielfrom xml.parsers import expat
333257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielfrom xml.dom.minidom import _append_child, _set_attribute_node
343257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielfrom xml.dom.NodeFilter import NodeFilter
353257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
363257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielfrom xml.dom.minicompat import *
373257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
383257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielTEXT_NODE = Node.TEXT_NODE
393257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielCDATA_SECTION_NODE = Node.CDATA_SECTION_NODE
403257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielDOCUMENT_NODE = Node.DOCUMENT_NODE
413257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
423257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielFILTER_ACCEPT = xmlbuilder.DOMBuilderFilter.FILTER_ACCEPT
433257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielFILTER_REJECT = xmlbuilder.DOMBuilderFilter.FILTER_REJECT
443257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielFILTER_SKIP = xmlbuilder.DOMBuilderFilter.FILTER_SKIP
453257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielFILTER_INTERRUPT = xmlbuilder.DOMBuilderFilter.FILTER_INTERRUPT
463257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
473257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanieltheDOMImplementation = minidom.getDOMImplementation()
483257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
493257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# Expat typename -> TypeInfo
503257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel_typeinfo_map = {
513257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    "CDATA":    minidom.TypeInfo(None, "cdata"),
523257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    "ENUM":     minidom.TypeInfo(None, "enumeration"),
533257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    "ENTITY":   minidom.TypeInfo(None, "entity"),
543257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    "ENTITIES": minidom.TypeInfo(None, "entities"),
553257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    "ID":       minidom.TypeInfo(None, "id"),
563257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    "IDREF":    minidom.TypeInfo(None, "idref"),
573257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    "IDREFS":   minidom.TypeInfo(None, "idrefs"),
583257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    "NMTOKEN":  minidom.TypeInfo(None, "nmtoken"),
593257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    "NMTOKENS": minidom.TypeInfo(None, "nmtokens"),
603257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    }
613257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
623257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielclass ElementInfo(object):
633257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    __slots__ = '_attr_info', '_model', 'tagName'
643257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
653257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def __init__(self, tagName, model=None):
663257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self.tagName = tagName
673257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self._attr_info = []
683257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self._model = model
693257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
703257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def __getstate__(self):
713257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        return self._attr_info, self._model, self.tagName
723257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
733257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def __setstate__(self, state):
743257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self._attr_info, self._model, self.tagName = state
753257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
763257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def getAttributeType(self, aname):
773257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        for info in self._attr_info:
783257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            if info[1] == aname:
793257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                t = info[-2]
803257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                if t[0] == "(":
813257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                    return _typeinfo_map["ENUM"]
823257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                else:
833257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                    return _typeinfo_map[info[-2]]
843257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        return minidom._no_type
853257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
863257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def getAttributeTypeNS(self, namespaceURI, localName):
873257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        return minidom._no_type
883257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
893257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def isElementContent(self):
903257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if self._model:
913257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            type = self._model[0]
923257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            return type not in (expat.model.XML_CTYPE_ANY,
933257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                                expat.model.XML_CTYPE_MIXED)
943257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        else:
953257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            return False
963257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
973257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def isEmpty(self):
983257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if self._model:
993257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            return self._model[0] == expat.model.XML_CTYPE_EMPTY
1003257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        else:
1013257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            return False
1023257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
1033257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def isId(self, aname):
1043257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        for info in self._attr_info:
1053257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            if info[1] == aname:
1063257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                return info[-2] == "ID"
1073257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        return False
1083257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
1093257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def isIdNS(self, euri, ename, auri, aname):
1103257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        # not sure this is meaningful
1113257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        return self.isId((auri, aname))
1123257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
1133257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanieldef _intern(builder, s):
1143257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    return builder._intern_setdefault(s, s)
1153257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
1163257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanieldef _parse_ns_name(builder, name):
1173257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    assert ' ' in name
1183257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    parts = name.split(' ')
1193257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    intern = builder._intern_setdefault
1203257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    if len(parts) == 3:
1213257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        uri, localname, prefix = parts
1223257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        prefix = intern(prefix, prefix)
1233257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        qname = "%s:%s" % (prefix, localname)
1243257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        qname = intern(qname, qname)
1253257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        localname = intern(localname, localname)
1263257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    else:
1273257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        uri, localname = parts
1283257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        prefix = EMPTY_PREFIX
1293257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        qname = localname = intern(localname, localname)
1303257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    return intern(uri, uri), localname, prefix, qname
1313257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
1323257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
1333257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielclass ExpatBuilder:
1343257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    """Document builder that uses Expat to build a ParsedXML.DOM document
1353257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    instance."""
1363257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
1373257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def __init__(self, options=None):
1383257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if options is None:
1393257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            options = xmlbuilder.Options()
1403257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self._options = options
1413257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if self._options.filter is not None:
1423257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self._filter = FilterVisibilityController(self._options.filter)
1433257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        else:
1443257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self._filter = None
1453257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            # This *really* doesn't do anything in this case, so
1463257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            # override it with something fast & minimal.
1473257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self._finish_start_element = id
1483257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self._parser = None
1493257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self.reset()
1503257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
1513257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def createParser(self):
1523257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        """Create a new parser object."""
1533257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        return expat.ParserCreate()
1543257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
1553257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def getParser(self):
1563257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        """Return the parser object, creating a new one if needed."""
1573257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if not self._parser:
1583257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self._parser = self.createParser()
1593257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self._intern_setdefault = self._parser.intern.setdefault
1603257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self._parser.buffer_text = True
1613257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self._parser.ordered_attributes = True
1623257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self._parser.specified_attributes = True
1633257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self.install(self._parser)
1643257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        return self._parser
1653257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
1663257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def reset(self):
1673257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        """Free all data structures used during DOM construction."""
1683257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self.document = theDOMImplementation.createDocument(
1693257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            EMPTY_NAMESPACE, None, None)
1703257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self.curNode = self.document
1713257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self._elem_info = self.document._elem_info
1723257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self._cdata = False
1733257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
1743257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def install(self, parser):
1753257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        """Install the callbacks needed to build the DOM into the parser."""
1763257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        # This creates circular references!
1773257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler
1783257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        parser.StartElementHandler = self.first_element_handler
1793257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        parser.EndElementHandler = self.end_element_handler
1803257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        parser.ProcessingInstructionHandler = self.pi_handler
1813257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if self._options.entities:
1823257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            parser.EntityDeclHandler = self.entity_decl_handler
1833257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        parser.NotationDeclHandler = self.notation_decl_handler
1843257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if self._options.comments:
1853257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            parser.CommentHandler = self.comment_handler
1863257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if self._options.cdata_sections:
1873257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            parser.StartCdataSectionHandler = self.start_cdata_section_handler
1883257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            parser.EndCdataSectionHandler = self.end_cdata_section_handler
1893257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            parser.CharacterDataHandler = self.character_data_handler_cdata
1903257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        else:
1913257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            parser.CharacterDataHandler = self.character_data_handler
1923257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        parser.ExternalEntityRefHandler = self.external_entity_ref_handler
1933257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        parser.XmlDeclHandler = self.xml_decl_handler
1943257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        parser.ElementDeclHandler = self.element_decl_handler
1953257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        parser.AttlistDeclHandler = self.attlist_decl_handler
1963257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
1973257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def parseFile(self, file):
1983257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        """Parse a document from a file object, returning the document
1993257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        node."""
2003257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        parser = self.getParser()
2013257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        first_buffer = True
2023257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        try:
2033257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            while 1:
2043257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                buffer = file.read(16*1024)
2053257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                if not buffer:
2063257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                    break
2073257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                parser.Parse(buffer, 0)
2083257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                if first_buffer and self.document.documentElement:
2093257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                    self._setup_subset(buffer)
2103257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                first_buffer = False
2113257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            parser.Parse("", True)
2123257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        except ParseEscape:
2133257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            pass
2143257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        doc = self.document
2153257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self.reset()
2163257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self._parser = None
2173257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        return doc
2183257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
2193257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def parseString(self, string):
2203257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        """Parse a document from a string, returning the document node."""
2213257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        parser = self.getParser()
2223257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        try:
2233257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            parser.Parse(string, True)
2243257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self._setup_subset(string)
2253257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        except ParseEscape:
2263257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            pass
2273257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        doc = self.document
2283257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self.reset()
2293257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self._parser = None
2303257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        return doc
2313257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
2323257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def _setup_subset(self, buffer):
2333257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        """Load the internal subset if there might be one."""
2343257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if self.document.doctype:
2353257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            extractor = InternalSubsetExtractor()
2363257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            extractor.parseString(buffer)
2373257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            subset = extractor.getSubset()
2383257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self.document.doctype.internalSubset = subset
2393257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
2403257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def start_doctype_decl_handler(self, doctypeName, systemId, publicId,
2413257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                                   has_internal_subset):
2423257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        doctype = self.document.implementation.createDocumentType(
2433257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            doctypeName, publicId, systemId)
2443257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        doctype.ownerDocument = self.document
2453257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        _append_child(self.document, doctype)
2463257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self.document.doctype = doctype
2473257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if self._filter and self._filter.acceptNode(doctype) == FILTER_REJECT:
2483257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self.document.doctype = None
2493257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            del self.document.childNodes[-1]
2503257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            doctype = None
2513257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self._parser.EntityDeclHandler = None
2523257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self._parser.NotationDeclHandler = None
2533257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if has_internal_subset:
2543257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            if doctype is not None:
2553257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                doctype.entities._seq = []
2563257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                doctype.notations._seq = []
2573257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self._parser.CommentHandler = None
2583257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self._parser.ProcessingInstructionHandler = None
2593257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self._parser.EndDoctypeDeclHandler = self.end_doctype_decl_handler
2603257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
2613257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def end_doctype_decl_handler(self):
2623257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if self._options.comments:
2633257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self._parser.CommentHandler = self.comment_handler
2643257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self._parser.ProcessingInstructionHandler = self.pi_handler
2653257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if not (self._elem_info or self._filter):
2663257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self._finish_end_element = id
2673257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
2683257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def pi_handler(self, target, data):
2693257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        node = self.document.createProcessingInstruction(target, data)
2703257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        _append_child(self.curNode, node)
2713257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if self._filter and self._filter.acceptNode(node) == FILTER_REJECT:
2723257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self.curNode.removeChild(node)
2733257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
2743257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def character_data_handler_cdata(self, data):
2753257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        childNodes = self.curNode.childNodes
2763257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if self._cdata:
2773257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            if (  self._cdata_continue
2783257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                  and childNodes[-1].nodeType == CDATA_SECTION_NODE):
2793257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                childNodes[-1].appendData(data)
2803257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                return
2813257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            node = self.document.createCDATASection(data)
2823257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self._cdata_continue = True
2833257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        elif childNodes and childNodes[-1].nodeType == TEXT_NODE:
2843257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            node = childNodes[-1]
2853257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            value = node.data + data
2863257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            d = node.__dict__
2873257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            d['data'] = d['nodeValue'] = value
2883257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            return
2893257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        else:
2903257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            node = minidom.Text()
2913257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            d = node.__dict__
2923257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            d['data'] = d['nodeValue'] = data
2933257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            d['ownerDocument'] = self.document
2943257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        _append_child(self.curNode, node)
2953257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
2963257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def character_data_handler(self, data):
2973257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        childNodes = self.curNode.childNodes
2983257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if childNodes and childNodes[-1].nodeType == TEXT_NODE:
2993257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            node = childNodes[-1]
3003257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            d = node.__dict__
3013257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            d['data'] = d['nodeValue'] = node.data + data
3023257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            return
3033257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        node = minidom.Text()
3043257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        d = node.__dict__
3053257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        d['data'] = d['nodeValue'] = node.data + data
3063257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        d['ownerDocument'] = self.document
3073257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        _append_child(self.curNode, node)
3083257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
3093257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def entity_decl_handler(self, entityName, is_parameter_entity, value,
3103257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                            base, systemId, publicId, notationName):
3113257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if is_parameter_entity:
3123257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            # we don't care about parameter entities for the DOM
3133257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            return
3143257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if not self._options.entities:
3153257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            return
3163257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        node = self.document._create_entity(entityName, publicId,
3173257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                                            systemId, notationName)
3183257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if value is not None:
3193257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            # internal entity
3203257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            # node *should* be readonly, but we'll cheat
3213257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            child = self.document.createTextNode(value)
3223257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            node.childNodes.append(child)
3233257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self.document.doctype.entities._seq.append(node)
3243257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if self._filter and self._filter.acceptNode(node) == FILTER_REJECT:
3253257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            del self.document.doctype.entities._seq[-1]
3263257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
3273257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def notation_decl_handler(self, notationName, base, systemId, publicId):
3283257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        node = self.document._create_notation(notationName, publicId, systemId)
3293257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self.document.doctype.notations._seq.append(node)
3303257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if self._filter and self._filter.acceptNode(node) == FILTER_ACCEPT:
3313257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            del self.document.doctype.notations._seq[-1]
3323257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
3333257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def comment_handler(self, data):
3343257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        node = self.document.createComment(data)
3353257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        _append_child(self.curNode, node)
3363257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if self._filter and self._filter.acceptNode(node) == FILTER_REJECT:
3373257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self.curNode.removeChild(node)
3383257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
3393257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def start_cdata_section_handler(self):
3403257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self._cdata = True
3413257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self._cdata_continue = False
3423257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
3433257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def end_cdata_section_handler(self):
3443257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self._cdata = False
3453257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self._cdata_continue = False
3463257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
3473257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def external_entity_ref_handler(self, context, base, systemId, publicId):
3483257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        return 1
3493257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
3503257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def first_element_handler(self, name, attributes):
3513257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if self._filter is None and not self._elem_info:
3523257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self._finish_end_element = id
3533257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self.getParser().StartElementHandler = self.start_element_handler
3543257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self.start_element_handler(name, attributes)
3553257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
3563257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def start_element_handler(self, name, attributes):
3573257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        node = self.document.createElement(name)
3583257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        _append_child(self.curNode, node)
3593257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self.curNode = node
3603257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
3613257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if attributes:
3623257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            for i in range(0, len(attributes), 2):
3633257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                a = minidom.Attr(attributes[i], EMPTY_NAMESPACE,
3643257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                                 None, EMPTY_PREFIX)
3653257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                value = attributes[i+1]
3663257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                d = a.childNodes[0].__dict__
3673257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                d['data'] = d['nodeValue'] = value
3683257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                d = a.__dict__
3693257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                d['value'] = d['nodeValue'] = value
3703257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                d['ownerDocument'] = self.document
3713257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                _set_attribute_node(node, a)
3723257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
3733257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if node is not self.document.documentElement:
3743257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self._finish_start_element(node)
3753257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
3763257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def _finish_start_element(self, node):
3773257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if self._filter:
3783257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            # To be general, we'd have to call isSameNode(), but this
3793257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            # is sufficient for minidom:
3803257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            if node is self.document.documentElement:
3813257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                return
3823257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            filt = self._filter.startContainer(node)
3833257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            if filt == FILTER_REJECT:
3843257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                # ignore this node & all descendents
3853257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                Rejecter(self)
3863257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            elif filt == FILTER_SKIP:
3873257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                # ignore this node, but make it's children become
3883257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                # children of the parent node
3893257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                Skipper(self)
3903257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            else:
3913257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                return
3923257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self.curNode = node.parentNode
3933257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            node.parentNode.removeChild(node)
3943257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            node.unlink()
3953257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
3963257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    # If this ever changes, Namespaces.end_element_handler() needs to
3973257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    # be changed to match.
3983257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    #
3993257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def end_element_handler(self, name):
4003257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        curNode = self.curNode
4013257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self.curNode = curNode.parentNode
4023257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self._finish_end_element(curNode)
4033257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
4043257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def _finish_end_element(self, curNode):
4053257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        info = self._elem_info.get(curNode.tagName)
4063257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if info:
4073257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self._handle_white_text_nodes(curNode, info)
4083257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if self._filter:
4093257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            if curNode is self.document.documentElement:
4103257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                return
4113257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            if self._filter.acceptNode(curNode) == FILTER_REJECT:
4123257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                self.curNode.removeChild(curNode)
4133257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                curNode.unlink()
4143257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
4153257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def _handle_white_text_nodes(self, node, info):
4163257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if (self._options.whitespace_in_element_content
4173257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            or not info.isElementContent()):
4183257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            return
4193257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
4203257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        # We have element type information and should remove ignorable
4213257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        # whitespace; identify for text nodes which contain only
4223257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        # whitespace.
4233257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        L = []
4243257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        for child in node.childNodes:
4253257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            if child.nodeType == TEXT_NODE and not child.data.strip():
4263257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                L.append(child)
4273257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
4283257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        # Remove ignorable whitespace from the tree.
4293257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        for child in L:
4303257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            node.removeChild(child)
4313257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
4323257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def element_decl_handler(self, name, model):
4333257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        info = self._elem_info.get(name)
4343257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if info is None:
4353257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self._elem_info[name] = ElementInfo(name, model)
4363257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        else:
4373257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            assert info._model is None
4383257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            info._model = model
4393257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
4403257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def attlist_decl_handler(self, elem, name, type, default, required):
4413257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        info = self._elem_info.get(elem)
4423257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if info is None:
4433257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            info = ElementInfo(elem)
4443257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self._elem_info[elem] = info
4453257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        info._attr_info.append(
4463257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            [None, name, None, None, default, 0, type, required])
4473257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
4483257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def xml_decl_handler(self, version, encoding, standalone):
4493257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self.document.version = version
4503257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self.document.encoding = encoding
4513257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        # This is still a little ugly, thanks to the pyexpat API. ;-(
4523257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if standalone >= 0:
4533257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            if standalone:
4543257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                self.document.standalone = True
4553257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            else:
4563257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                self.document.standalone = False
4573257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
4583257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
4593257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# Don't include FILTER_INTERRUPT, since that's checked separately
4603257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# where allowed.
4613257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel_ALLOWED_FILTER_RETURNS = (FILTER_ACCEPT, FILTER_REJECT, FILTER_SKIP)
4623257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
4633257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielclass FilterVisibilityController(object):
4643257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    """Wrapper around a DOMBuilderFilter which implements the checks
4653257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    to make the whatToShow filter attribute work."""
4663257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
4673257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    __slots__ = 'filter',
4683257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
4693257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def __init__(self, filter):
4703257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self.filter = filter
4713257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
4723257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def startContainer(self, node):
4733257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        mask = self._nodetype_mask[node.nodeType]
4743257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if self.filter.whatToShow & mask:
4753257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            val = self.filter.startContainer(node)
4763257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            if val == FILTER_INTERRUPT:
4773257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                raise ParseEscape
4783257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            if val not in _ALLOWED_FILTER_RETURNS:
4793257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                raise ValueError, \
4803257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                      "startContainer() returned illegal value: " + repr(val)
4813257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            return val
4823257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        else:
4833257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            return FILTER_ACCEPT
4843257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
4853257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def acceptNode(self, node):
4863257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        mask = self._nodetype_mask[node.nodeType]
4873257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if self.filter.whatToShow & mask:
4883257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            val = self.filter.acceptNode(node)
4893257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            if val == FILTER_INTERRUPT:
4903257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                raise ParseEscape
4913257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            if val == FILTER_SKIP:
4923257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                # move all child nodes to the parent, and remove this node
4933257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                parent = node.parentNode
4943257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                for child in node.childNodes[:]:
4953257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                    parent.appendChild(child)
4963257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                # node is handled by the caller
4973257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                return FILTER_REJECT
4983257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            if val not in _ALLOWED_FILTER_RETURNS:
4993257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                raise ValueError, \
5003257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                      "acceptNode() returned illegal value: " + repr(val)
5013257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            return val
5023257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        else:
5033257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            return FILTER_ACCEPT
5043257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
5053257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    _nodetype_mask = {
5063257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        Node.ELEMENT_NODE:                NodeFilter.SHOW_ELEMENT,
5073257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        Node.ATTRIBUTE_NODE:              NodeFilter.SHOW_ATTRIBUTE,
5083257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        Node.TEXT_NODE:                   NodeFilter.SHOW_TEXT,
5093257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        Node.CDATA_SECTION_NODE:          NodeFilter.SHOW_CDATA_SECTION,
5103257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        Node.ENTITY_REFERENCE_NODE:       NodeFilter.SHOW_ENTITY_REFERENCE,
5113257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        Node.ENTITY_NODE:                 NodeFilter.SHOW_ENTITY,
5123257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        Node.PROCESSING_INSTRUCTION_NODE: NodeFilter.SHOW_PROCESSING_INSTRUCTION,
5133257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        Node.COMMENT_NODE:                NodeFilter.SHOW_COMMENT,
5143257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        Node.DOCUMENT_NODE:               NodeFilter.SHOW_DOCUMENT,
5153257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        Node.DOCUMENT_TYPE_NODE:          NodeFilter.SHOW_DOCUMENT_TYPE,
5163257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        Node.DOCUMENT_FRAGMENT_NODE:      NodeFilter.SHOW_DOCUMENT_FRAGMENT,
5173257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        Node.NOTATION_NODE:               NodeFilter.SHOW_NOTATION,
5183257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        }
5193257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
5203257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
5213257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielclass FilterCrutch(object):
5223257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    __slots__ = '_builder', '_level', '_old_start', '_old_end'
5233257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
5243257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def __init__(self, builder):
5253257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self._level = 0
5263257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self._builder = builder
5273257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        parser = builder._parser
5283257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self._old_start = parser.StartElementHandler
5293257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self._old_end = parser.EndElementHandler
5303257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        parser.StartElementHandler = self.start_element_handler
5313257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        parser.EndElementHandler = self.end_element_handler
5323257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
5333257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielclass Rejecter(FilterCrutch):
5343257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    __slots__ = ()
5353257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
5363257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def __init__(self, builder):
5373257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        FilterCrutch.__init__(self, builder)
5383257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        parser = builder._parser
5393257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        for name in ("ProcessingInstructionHandler",
5403257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                     "CommentHandler",
5413257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                     "CharacterDataHandler",
5423257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                     "StartCdataSectionHandler",
5433257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                     "EndCdataSectionHandler",
5443257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                     "ExternalEntityRefHandler",
5453257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                     ):
5463257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            setattr(parser, name, None)
5473257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
5483257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def start_element_handler(self, *args):
5493257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self._level = self._level + 1
5503257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
5513257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def end_element_handler(self, *args):
5523257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if self._level == 0:
5533257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            # restore the old handlers
5543257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            parser = self._builder._parser
5553257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self._builder.install(parser)
5563257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            parser.StartElementHandler = self._old_start
5573257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            parser.EndElementHandler = self._old_end
5583257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        else:
5593257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self._level = self._level - 1
5603257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
5613257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielclass Skipper(FilterCrutch):
5623257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    __slots__ = ()
5633257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
5643257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def start_element_handler(self, *args):
5653257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        node = self._builder.curNode
5663257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self._old_start(*args)
5673257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if self._builder.curNode is not node:
5683257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self._level = self._level + 1
5693257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
5703257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def end_element_handler(self, *args):
5713257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if self._level == 0:
5723257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            # We're popping back out of the node we're skipping, so we
5733257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            # shouldn't need to do anything but reset the handlers.
5743257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self._builder._parser.StartElementHandler = self._old_start
5753257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self._builder._parser.EndElementHandler = self._old_end
5763257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self._builder = None
5773257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        else:
5783257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self._level = self._level - 1
5793257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self._old_end(*args)
5803257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
5813257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
5823257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# framework document used by the fragment builder.
5833257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# Takes a string for the doctype, subset string, and namespace attrs string.
5843257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
5853257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel_FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID = \
5863257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    "http://xml.python.org/entities/fragment-builder/internal"
5873257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
5883257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel_FRAGMENT_BUILDER_TEMPLATE = (
5893257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    '''\
5903257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel<!DOCTYPE wrapper
5913257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel  %%s [
5923257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel  <!ENTITY fragment-builder-internal
5933257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    SYSTEM "%s">
5943257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel%%s
5953257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel]>
5963257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel<wrapper %%s
5973257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel>&fragment-builder-internal;</wrapper>'''
5983257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    % _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID)
5993257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
6003257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
6013257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielclass FragmentBuilder(ExpatBuilder):
6023257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    """Builder which constructs document fragments given XML source
6033257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    text and a context node.
6043257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
6053257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    The context node is expected to provide information about the
6063257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    namespace declarations which are in scope at the start of the
6073257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    fragment.
6083257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    """
6093257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
6103257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def __init__(self, context, options=None):
6113257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if context.nodeType == DOCUMENT_NODE:
6123257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self.originalDocument = context
6133257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self.context = context
6143257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        else:
6153257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self.originalDocument = context.ownerDocument
6163257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self.context = context
6173257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        ExpatBuilder.__init__(self, options)
6183257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
6193257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def reset(self):
6203257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        ExpatBuilder.reset(self)
6213257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self.fragment = None
6223257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
6233257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def parseFile(self, file):
6243257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        """Parse a document fragment from a file object, returning the
6253257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        fragment node."""
6263257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        return self.parseString(file.read())
6273257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
6283257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def parseString(self, string):
6293257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        """Parse a document fragment from a string, returning the
6303257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        fragment node."""
6313257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self._source = string
6323257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        parser = self.getParser()
6333257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        doctype = self.originalDocument.doctype
6343257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        ident = ""
6353257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if doctype:
6363257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            subset = doctype.internalSubset or self._getDeclarations()
6373257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            if doctype.publicId:
6383257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                ident = ('PUBLIC "%s" "%s"'
6393257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                         % (doctype.publicId, doctype.systemId))
6403257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            elif doctype.systemId:
6413257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                ident = 'SYSTEM "%s"' % doctype.systemId
6423257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        else:
6433257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            subset = ""
6443257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        nsattrs = self._getNSattrs() # get ns decls from node's ancestors
6453257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        document = _FRAGMENT_BUILDER_TEMPLATE % (ident, subset, nsattrs)
6463257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        try:
6473257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            parser.Parse(document, 1)
6483257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        except:
6493257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self.reset()
6503257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            raise
6513257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        fragment = self.fragment
6523257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self.reset()
6533257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel##         self._parser = None
6543257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        return fragment
6553257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
6563257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def _getDeclarations(self):
6573257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        """Re-create the internal subset from the DocumentType node.
6583257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
6593257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        This is only needed if we don't already have the
6603257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        internalSubset as a string.
6613257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        """
6623257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        doctype = self.context.ownerDocument.doctype
6633257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        s = ""
6643257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if doctype:
6653257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            for i in range(doctype.notations.length):
6663257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                notation = doctype.notations.item(i)
6673257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                if s:
6683257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                    s = s + "\n  "
6693257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                s = "%s<!NOTATION %s" % (s, notation.nodeName)
6703257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                if notation.publicId:
6713257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                    s = '%s PUBLIC "%s"\n             "%s">' \
6723257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                        % (s, notation.publicId, notation.systemId)
6733257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                else:
6743257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                    s = '%s SYSTEM "%s">' % (s, notation.systemId)
6753257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            for i in range(doctype.entities.length):
6763257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                entity = doctype.entities.item(i)
6773257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                if s:
6783257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                    s = s + "\n  "
6793257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                s = "%s<!ENTITY %s" % (s, entity.nodeName)
6803257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                if entity.publicId:
6813257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                    s = '%s PUBLIC "%s"\n             "%s"' \
6823257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                        % (s, entity.publicId, entity.systemId)
6833257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                elif entity.systemId:
6843257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                    s = '%s SYSTEM "%s"' % (s, entity.systemId)
6853257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                else:
6863257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                    s = '%s "%s"' % (s, entity.firstChild.data)
6873257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                if entity.notationName:
6883257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                    s = "%s NOTATION %s" % (s, entity.notationName)
6893257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                s = s + ">"
6903257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        return s
6913257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
6923257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def _getNSattrs(self):
6933257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        return ""
6943257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
6953257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def external_entity_ref_handler(self, context, base, systemId, publicId):
6963257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if systemId == _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID:
6973257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            # this entref is the one that we made to put the subtree
6983257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            # in; all of our given input is parsed in here.
6993257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            old_document = self.document
7003257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            old_cur_node = self.curNode
7013257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            parser = self._parser.ExternalEntityParserCreate(context)
7023257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            # put the real document back, parse into the fragment to return
7033257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self.document = self.originalDocument
7043257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self.fragment = self.document.createDocumentFragment()
7053257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self.curNode = self.fragment
7063257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            try:
7073257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                parser.Parse(self._source, 1)
7083257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            finally:
7093257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                self.curNode = old_cur_node
7103257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                self.document = old_document
7113257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                self._source = None
7123257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            return -1
7133257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        else:
7143257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            return ExpatBuilder.external_entity_ref_handler(
7153257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                self, context, base, systemId, publicId)
7163257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
7173257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
7183257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielclass Namespaces:
7193257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    """Mix-in class for builders; adds support for namespaces."""
7203257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
7213257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def _initNamespaces(self):
7223257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        # list of (prefix, uri) ns declarations.  Namespace attrs are
7233257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        # constructed from this and added to the element's attrs.
7243257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self._ns_ordered_prefixes = []
7253257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
7263257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def createParser(self):
7273257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        """Create a new namespace-handling parser."""
7283257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        parser = expat.ParserCreate(namespace_separator=" ")
7293257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        parser.namespace_prefixes = True
7303257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        return parser
7313257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
7323257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def install(self, parser):
7333257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        """Insert the namespace-handlers onto the parser."""
7343257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        ExpatBuilder.install(self, parser)
7353257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if self._options.namespace_declarations:
7363257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            parser.StartNamespaceDeclHandler = (
7373257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                self.start_namespace_decl_handler)
7383257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
7393257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def start_namespace_decl_handler(self, prefix, uri):
7403257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        """Push this namespace declaration on our storage."""
7413257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self._ns_ordered_prefixes.append((prefix, uri))
7423257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
7433257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def start_element_handler(self, name, attributes):
7443257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if ' ' in name:
7453257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            uri, localname, prefix, qname = _parse_ns_name(self, name)
7463257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        else:
7473257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            uri = EMPTY_NAMESPACE
7483257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            qname = name
7493257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            localname = None
7503257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            prefix = EMPTY_PREFIX
7513257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        node = minidom.Element(qname, uri, prefix, localname)
7523257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        node.ownerDocument = self.document
7533257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        _append_child(self.curNode, node)
7543257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self.curNode = node
7553257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
7563257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if self._ns_ordered_prefixes:
7573257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            for prefix, uri in self._ns_ordered_prefixes:
7583257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                if prefix:
7593257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                    a = minidom.Attr(_intern(self, 'xmlns:' + prefix),
7603257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                                     XMLNS_NAMESPACE, prefix, "xmlns")
7613257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                else:
7623257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                    a = minidom.Attr("xmlns", XMLNS_NAMESPACE,
7633257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                                     "xmlns", EMPTY_PREFIX)
7643257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                d = a.childNodes[0].__dict__
7653257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                d['data'] = d['nodeValue'] = uri
7663257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                d = a.__dict__
7673257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                d['value'] = d['nodeValue'] = uri
7683257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                d['ownerDocument'] = self.document
7693257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                _set_attribute_node(node, a)
7703257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            del self._ns_ordered_prefixes[:]
7713257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
7723257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if attributes:
7733257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            _attrs = node._attrs
7743257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            _attrsNS = node._attrsNS
7753257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            for i in range(0, len(attributes), 2):
7763257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                aname = attributes[i]
7773257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                value = attributes[i+1]
7783257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                if ' ' in aname:
7793257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                    uri, localname, prefix, qname = _parse_ns_name(self, aname)
7803257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                    a = minidom.Attr(qname, uri, localname, prefix)
7813257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                    _attrs[qname] = a
7823257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                    _attrsNS[(uri, localname)] = a
7833257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                else:
7843257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                    a = minidom.Attr(aname, EMPTY_NAMESPACE,
7853257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                                     aname, EMPTY_PREFIX)
7863257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                    _attrs[aname] = a
7873257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                    _attrsNS[(EMPTY_NAMESPACE, aname)] = a
7883257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                d = a.childNodes[0].__dict__
7893257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                d['data'] = d['nodeValue'] = value
7903257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                d = a.__dict__
7913257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                d['ownerDocument'] = self.document
7923257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                d['value'] = d['nodeValue'] = value
7933257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                d['ownerElement'] = node
7943257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
7953257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    if __debug__:
7963257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        # This only adds some asserts to the original
7973257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        # end_element_handler(), so we only define this when -O is not
7983257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        # used.  If changing one, be sure to check the other to see if
7993257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        # it needs to be changed as well.
8003257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        #
8013257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        def end_element_handler(self, name):
8023257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            curNode = self.curNode
8033257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            if ' ' in name:
8043257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                uri, localname, prefix, qname = _parse_ns_name(self, name)
8053257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                assert (curNode.namespaceURI == uri
8063257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                        and curNode.localName == localname
8073257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                        and curNode.prefix == prefix), \
8083257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                        "element stack messed up! (namespace)"
8093257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            else:
8103257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                assert curNode.nodeName == name, \
8113257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                       "element stack messed up - bad nodeName"
8123257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                assert curNode.namespaceURI == EMPTY_NAMESPACE, \
8133257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                       "element stack messed up - bad namespaceURI"
8143257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self.curNode = curNode.parentNode
8153257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self._finish_end_element(curNode)
8163257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
8173257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
8183257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielclass ExpatBuilderNS(Namespaces, ExpatBuilder):
8193257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    """Document builder that supports namespaces."""
8203257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
8213257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def reset(self):
8223257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        ExpatBuilder.reset(self)
8233257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self._initNamespaces()
8243257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
8253257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
8263257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielclass FragmentBuilderNS(Namespaces, FragmentBuilder):
8273257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    """Fragment builder that supports namespaces."""
8283257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
8293257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def reset(self):
8303257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        FragmentBuilder.reset(self)
8313257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self._initNamespaces()
8323257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
8333257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def _getNSattrs(self):
8343257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        """Return string of namespace attributes from this element and
8353257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        ancestors."""
8363257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        # XXX This needs to be re-written to walk the ancestors of the
8373257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        # context to build up the namespace information from
8383257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        # declarations, elements, and attributes found in context.
8393257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        # Otherwise we have to store a bunch more data on the DOM
8403257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        # (though that *might* be more reliable -- not clear).
8413257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        attrs = ""
8423257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        context = self.context
8433257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        L = []
8443257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        while context:
8453257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            if hasattr(context, '_ns_prefix_uri'):
8463257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                for prefix, uri in context._ns_prefix_uri.items():
8473257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                    # add every new NS decl from context to L and attrs string
8483257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                    if prefix in L:
8493257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                        continue
8503257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                    L.append(prefix)
8513257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                    if prefix:
8523257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                        declname = "xmlns:" + prefix
8533257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                    else:
8543257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                        declname = "xmlns"
8553257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                    if attrs:
8563257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                        attrs = "%s\n    %s='%s'" % (attrs, declname, uri)
8573257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                    else:
8583257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                        attrs = " %s='%s'" % (declname, uri)
8593257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            context = context.parentNode
8603257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        return attrs
8613257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
8623257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
8633257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielclass ParseEscape(Exception):
8643257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    """Exception raised to short-circuit parsing in InternalSubsetExtractor."""
8653257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    pass
8663257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
8673257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielclass InternalSubsetExtractor(ExpatBuilder):
8683257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    """XML processor which can rip out the internal document type subset."""
8693257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
8703257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    subset = None
8713257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
8723257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def getSubset(self):
8733257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        """Return the internal subset as a string."""
8743257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        return self.subset
8753257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
8763257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def parseFile(self, file):
8773257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        try:
8783257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            ExpatBuilder.parseFile(self, file)
8793257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        except ParseEscape:
8803257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            pass
8813257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
8823257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def parseString(self, string):
8833257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        try:
8843257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            ExpatBuilder.parseString(self, string)
8853257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        except ParseEscape:
8863257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            pass
8873257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
8883257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def install(self, parser):
8893257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler
8903257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        parser.StartElementHandler = self.start_element_handler
8913257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
8923257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def start_doctype_decl_handler(self, name, publicId, systemId,
8933257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel                                   has_internal_subset):
8943257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        if has_internal_subset:
8953257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            parser = self.getParser()
8963257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            self.subset = []
8973257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            parser.DefaultHandler = self.subset.append
8983257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            parser.EndDoctypeDeclHandler = self.end_doctype_decl_handler
8993257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        else:
9003257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            raise ParseEscape()
9013257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
9023257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def end_doctype_decl_handler(self):
9033257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        s = ''.join(self.subset).replace('\r\n', '\n').replace('\r', '\n')
9043257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        self.subset = s
9053257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        raise ParseEscape()
9063257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
9073257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    def start_element_handler(self, name, attrs):
9083257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        raise ParseEscape()
9093257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
9103257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
9113257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanieldef parse(file, namespaces=True):
9123257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    """Parse a document, returning the resulting Document node.
9133257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
9143257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    'file' may be either a file name or an open file object.
9153257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    """
9163257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    if namespaces:
9173257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        builder = ExpatBuilderNS()
9183257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    else:
9193257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        builder = ExpatBuilder()
9203257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
9213257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    if isinstance(file, StringTypes):
9223257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        fp = open(file, 'rb')
9233257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        try:
9243257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            result = builder.parseFile(fp)
9253257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        finally:
9263257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            fp.close()
9273257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    else:
9283257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        result = builder.parseFile(file)
9293257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    return result
9303257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
9313257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
9323257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanieldef parseString(string, namespaces=True):
9333257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    """Parse a document from a string, returning the resulting
9343257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    Document node.
9353257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    """
9363257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    if namespaces:
9373257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        builder = ExpatBuilderNS()
9383257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    else:
9393257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        builder = ExpatBuilder()
9403257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    return builder.parseString(string)
9413257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
9423257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
9433257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanieldef parseFragment(file, context, namespaces=True):
9443257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    """Parse a fragment of a document, given the context from which it
9453257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    was originally extracted.  context should be the parent of the
9463257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    node(s) which are in the fragment.
9473257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
9483257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    'file' may be either a file name or an open file object.
9493257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    """
9503257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    if namespaces:
9513257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        builder = FragmentBuilderNS(context)
9523257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    else:
9533257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        builder = FragmentBuilder(context)
9543257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
9553257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    if isinstance(file, StringTypes):
9563257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        fp = open(file, 'rb')
9573257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        try:
9583257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            result = builder.parseFile(fp)
9593257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        finally:
9603257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel            fp.close()
9613257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    else:
9623257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        result = builder.parseFile(file)
9633257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    return result
9643257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
9653257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
9663257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanieldef parseFragmentString(string, context, namespaces=True):
9673257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    """Parse a fragment of a document from a string, given the context
9683257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    from which it was originally extracted.  context should be the
9693257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    parent of the node(s) which are in the fragment.
9703257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    """
9713257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    if namespaces:
9723257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        builder = FragmentBuilderNS(context)
9733257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    else:
9743257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        builder = FragmentBuilder(context)
9753257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    return builder.parseString(string)
9763257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
9773257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel
9783257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanieldef makeBuilder(options):
9793257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    """Create a builder based on an Options object."""
9803257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    if options.namespaces:
9813257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        return ExpatBuilderNS(options)
9823257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel    else:
9833257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel        return ExpatBuilder(options)
984