10a8c90248264a8b26970b4473770bcc3df8515fJosh Gao"""Facility to use the Expat parser to load a minidom instance
20a8c90248264a8b26970b4473770bcc3df8515fJosh Gaofrom a string or file.
30a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
40a8c90248264a8b26970b4473770bcc3df8515fJosh GaoThis avoids all the overhead of SAX and pulldom to gain performance.
50a8c90248264a8b26970b4473770bcc3df8515fJosh Gao"""
60a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
70a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Warning!
80a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
90a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# This module is tightly bound to the implementation details of the
100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# minidom DOM and can't be used with other DOM implementations.  This
110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# is due, in part, to a lack of appropriate methods in the DOM (there is
120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# no way to create Entity and Notation nodes via the DOM Level 2
130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# interface), and for performance.  The later is the cause of some fairly
140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# cryptic code.
150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Performance hacks:
170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#   -  .character_data_handler() has an extra case in which continuing
190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#      data is appended to an existing Text node; this can be a
200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#      speedup since pyexpat can break up character data into multiple
210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#      callbacks even though we set the buffer_text attribute on the
220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#      parser.  This also gives us the advantage that we don't need a
230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#      separate normalization pass.
240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#   -  Determining that a node exists is done using an identity comparison
260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#      with None rather than a truth test; this avoids searching for and
270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#      calling any methods on the node object if it exists.  (A rather
280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#      nice speedup is achieved this way as well!)
290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
300a8c90248264a8b26970b4473770bcc3df8515fJosh Gaofrom xml.dom import xmlbuilder, minidom, Node
310a8c90248264a8b26970b4473770bcc3df8515fJosh Gaofrom xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE
320a8c90248264a8b26970b4473770bcc3df8515fJosh Gaofrom xml.parsers import expat
330a8c90248264a8b26970b4473770bcc3df8515fJosh Gaofrom xml.dom.minidom import _append_child, _set_attribute_node
340a8c90248264a8b26970b4473770bcc3df8515fJosh Gaofrom xml.dom.NodeFilter import NodeFilter
350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
360a8c90248264a8b26970b4473770bcc3df8515fJosh Gaofrom xml.dom.minicompat import *
370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
380a8c90248264a8b26970b4473770bcc3df8515fJosh GaoTEXT_NODE = Node.TEXT_NODE
390a8c90248264a8b26970b4473770bcc3df8515fJosh GaoCDATA_SECTION_NODE = Node.CDATA_SECTION_NODE
400a8c90248264a8b26970b4473770bcc3df8515fJosh GaoDOCUMENT_NODE = Node.DOCUMENT_NODE
410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
420a8c90248264a8b26970b4473770bcc3df8515fJosh GaoFILTER_ACCEPT = xmlbuilder.DOMBuilderFilter.FILTER_ACCEPT
430a8c90248264a8b26970b4473770bcc3df8515fJosh GaoFILTER_REJECT = xmlbuilder.DOMBuilderFilter.FILTER_REJECT
440a8c90248264a8b26970b4473770bcc3df8515fJosh GaoFILTER_SKIP = xmlbuilder.DOMBuilderFilter.FILTER_SKIP
450a8c90248264a8b26970b4473770bcc3df8515fJosh GaoFILTER_INTERRUPT = xmlbuilder.DOMBuilderFilter.FILTER_INTERRUPT
460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
470a8c90248264a8b26970b4473770bcc3df8515fJosh GaotheDOMImplementation = minidom.getDOMImplementation()
480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Expat typename -> TypeInfo
500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_typeinfo_map = {
510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "CDATA":    minidom.TypeInfo(None, "cdata"),
520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "ENUM":     minidom.TypeInfo(None, "enumeration"),
530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "ENTITY":   minidom.TypeInfo(None, "entity"),
540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "ENTITIES": minidom.TypeInfo(None, "entities"),
550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "ID":       minidom.TypeInfo(None, "id"),
560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "IDREF":    minidom.TypeInfo(None, "idref"),
570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "IDREFS":   minidom.TypeInfo(None, "idrefs"),
580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "NMTOKEN":  minidom.TypeInfo(None, "nmtoken"),
590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "NMTOKENS": minidom.TypeInfo(None, "nmtokens"),
600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    }
610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
620a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass ElementInfo(object):
630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    __slots__ = '_attr_info', '_model', 'tagName'
640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self, tagName, model=None):
660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.tagName = tagName
670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._attr_info = []
680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._model = model
690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __getstate__(self):
710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self._attr_info, self._model, self.tagName
720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __setstate__(self, state):
740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._attr_info, self._model, self.tagName = state
750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def getAttributeType(self, aname):
770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for info in self._attr_info:
780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if info[1] == aname:
790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                t = info[-2]
800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if t[0] == "(":
810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    return _typeinfo_map["ENUM"]
820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                else:
830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    return _typeinfo_map[info[-2]]
840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return minidom._no_type
850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def getAttributeTypeNS(self, namespaceURI, localName):
870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return minidom._no_type
880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def isElementContent(self):
900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self._model:
910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            type = self._model[0]
920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return type not in (expat.model.XML_CTYPE_ANY,
930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                expat.model.XML_CTYPE_MIXED)
940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return False
960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def isEmpty(self):
980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self._model:
990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return self._model[0] == expat.model.XML_CTYPE_EMPTY
1000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
1010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return False
1020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def isId(self, aname):
1040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for info in self._attr_info:
1050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if info[1] == aname:
1060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                return info[-2] == "ID"
1070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return False
1080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def isIdNS(self, euri, ename, auri, aname):
1100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # not sure this is meaningful
1110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.isId((auri, aname))
1120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1130a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _intern(builder, s):
1140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return builder._intern_setdefault(s, s)
1150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1160a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _parse_ns_name(builder, name):
1170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    assert ' ' in name
1180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    parts = name.split(' ')
1190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    intern = builder._intern_setdefault
1200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if len(parts) == 3:
1210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        uri, localname, prefix = parts
1220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        prefix = intern(prefix, prefix)
1230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        qname = "%s:%s" % (prefix, localname)
1240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        qname = intern(qname, qname)
1250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        localname = intern(localname, localname)
1260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    else:
1270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        uri, localname = parts
1280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        prefix = EMPTY_PREFIX
1290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        qname = localname = intern(localname, localname)
1300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return intern(uri, uri), localname, prefix, qname
1310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1330a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass ExpatBuilder:
1340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Document builder that uses Expat to build a ParsedXML.DOM document
1350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    instance."""
1360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self, options=None):
1380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if options is None:
1390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            options = xmlbuilder.Options()
1400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._options = options
1410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self._options.filter is not None:
1420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._filter = FilterVisibilityController(self._options.filter)
1430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
1440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._filter = None
1450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # This *really* doesn't do anything in this case, so
1460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # override it with something fast & minimal.
1470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._finish_start_element = id
1480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._parser = None
1490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.reset()
1500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def createParser(self):
1520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Create a new parser object."""
1530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return expat.ParserCreate()
1540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def getParser(self):
1560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Return the parser object, creating a new one if needed."""
1570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not self._parser:
1580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._parser = self.createParser()
1590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._intern_setdefault = self._parser.intern.setdefault
1600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._parser.buffer_text = True
1610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._parser.ordered_attributes = True
1620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._parser.specified_attributes = True
1630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.install(self._parser)
1640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self._parser
1650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def reset(self):
1670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Free all data structures used during DOM construction."""
1680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.document = theDOMImplementation.createDocument(
1690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            EMPTY_NAMESPACE, None, None)
1700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.curNode = self.document
1710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._elem_info = self.document._elem_info
1720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._cdata = False
1730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def install(self, parser):
1750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Install the callbacks needed to build the DOM into the parser."""
1760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # This creates circular references!
1770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler
1780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser.StartElementHandler = self.first_element_handler
1790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser.EndElementHandler = self.end_element_handler
1800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser.ProcessingInstructionHandler = self.pi_handler
1810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self._options.entities:
1820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            parser.EntityDeclHandler = self.entity_decl_handler
1830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser.NotationDeclHandler = self.notation_decl_handler
1840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self._options.comments:
1850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            parser.CommentHandler = self.comment_handler
1860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self._options.cdata_sections:
1870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            parser.StartCdataSectionHandler = self.start_cdata_section_handler
1880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            parser.EndCdataSectionHandler = self.end_cdata_section_handler
1890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            parser.CharacterDataHandler = self.character_data_handler_cdata
1900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
1910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            parser.CharacterDataHandler = self.character_data_handler
1920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser.ExternalEntityRefHandler = self.external_entity_ref_handler
1930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser.XmlDeclHandler = self.xml_decl_handler
1940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser.ElementDeclHandler = self.element_decl_handler
1950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser.AttlistDeclHandler = self.attlist_decl_handler
1960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def parseFile(self, file):
1980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Parse a document from a file object, returning the document
1990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        node."""
2000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser = self.getParser()
2010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        first_buffer = True
2020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
2030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            while 1:
2040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                buffer = file.read(16*1024)
2050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if not buffer:
2060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    break
2070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                parser.Parse(buffer, 0)
2080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if first_buffer and self.document.documentElement:
2090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    self._setup_subset(buffer)
2100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                first_buffer = False
2110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            parser.Parse("", True)
2120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except ParseEscape:
2130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            pass
2140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        doc = self.document
2150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.reset()
2160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._parser = None
2170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return doc
2180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def parseString(self, string):
2200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Parse a document from a string, returning the document node."""
2210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser = self.getParser()
2220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
2230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            parser.Parse(string, True)
2240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._setup_subset(string)
2250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except ParseEscape:
2260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            pass
2270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        doc = self.document
2280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.reset()
2290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._parser = None
2300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return doc
2310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _setup_subset(self, buffer):
2330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Load the internal subset if there might be one."""
2340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.document.doctype:
2350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            extractor = InternalSubsetExtractor()
2360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            extractor.parseString(buffer)
2370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            subset = extractor.getSubset()
2380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.document.doctype.internalSubset = subset
2390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def start_doctype_decl_handler(self, doctypeName, systemId, publicId,
2410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                   has_internal_subset):
2420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        doctype = self.document.implementation.createDocumentType(
2430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            doctypeName, publicId, systemId)
2440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        doctype.ownerDocument = self.document
2450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        _append_child(self.document, doctype)
2460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.document.doctype = doctype
2470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self._filter and self._filter.acceptNode(doctype) == FILTER_REJECT:
2480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.document.doctype = None
2490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            del self.document.childNodes[-1]
2500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            doctype = None
2510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._parser.EntityDeclHandler = None
2520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._parser.NotationDeclHandler = None
2530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if has_internal_subset:
2540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if doctype is not None:
2550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                doctype.entities._seq = []
2560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                doctype.notations._seq = []
2570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._parser.CommentHandler = None
2580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._parser.ProcessingInstructionHandler = None
2590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._parser.EndDoctypeDeclHandler = self.end_doctype_decl_handler
2600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def end_doctype_decl_handler(self):
2620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self._options.comments:
2630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._parser.CommentHandler = self.comment_handler
2640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._parser.ProcessingInstructionHandler = self.pi_handler
2650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not (self._elem_info or self._filter):
2660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._finish_end_element = id
2670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def pi_handler(self, target, data):
2690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        node = self.document.createProcessingInstruction(target, data)
2700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        _append_child(self.curNode, node)
2710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self._filter and self._filter.acceptNode(node) == FILTER_REJECT:
2720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.curNode.removeChild(node)
2730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def character_data_handler_cdata(self, data):
2750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        childNodes = self.curNode.childNodes
2760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self._cdata:
2770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if (  self._cdata_continue
2780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                  and childNodes[-1].nodeType == CDATA_SECTION_NODE):
2790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                childNodes[-1].appendData(data)
2800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                return
2810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            node = self.document.createCDATASection(data)
2820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._cdata_continue = True
2830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif childNodes and childNodes[-1].nodeType == TEXT_NODE:
2840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            node = childNodes[-1]
2850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            value = node.data + data
2860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            d = node.__dict__
2870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            d['data'] = d['nodeValue'] = value
2880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return
2890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
2900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            node = minidom.Text()
2910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            d = node.__dict__
2920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            d['data'] = d['nodeValue'] = data
2930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            d['ownerDocument'] = self.document
2940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        _append_child(self.curNode, node)
2950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def character_data_handler(self, data):
2970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        childNodes = self.curNode.childNodes
2980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if childNodes and childNodes[-1].nodeType == TEXT_NODE:
2990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            node = childNodes[-1]
3000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            d = node.__dict__
3010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            d['data'] = d['nodeValue'] = node.data + data
3020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return
3030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        node = minidom.Text()
3040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        d = node.__dict__
3050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        d['data'] = d['nodeValue'] = node.data + data
3060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        d['ownerDocument'] = self.document
3070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        _append_child(self.curNode, node)
3080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def entity_decl_handler(self, entityName, is_parameter_entity, value,
3100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                            base, systemId, publicId, notationName):
3110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if is_parameter_entity:
3120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # we don't care about parameter entities for the DOM
3130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return
3140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not self._options.entities:
3150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return
3160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        node = self.document._create_entity(entityName, publicId,
3170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                            systemId, notationName)
3180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if value is not None:
3190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # internal entity
3200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # node *should* be readonly, but we'll cheat
3210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            child = self.document.createTextNode(value)
3220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            node.childNodes.append(child)
3230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.document.doctype.entities._seq.append(node)
3240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self._filter and self._filter.acceptNode(node) == FILTER_REJECT:
3250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            del self.document.doctype.entities._seq[-1]
3260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def notation_decl_handler(self, notationName, base, systemId, publicId):
3280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        node = self.document._create_notation(notationName, publicId, systemId)
3290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.document.doctype.notations._seq.append(node)
3300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self._filter and self._filter.acceptNode(node) == FILTER_ACCEPT:
3310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            del self.document.doctype.notations._seq[-1]
3320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def comment_handler(self, data):
3340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        node = self.document.createComment(data)
3350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        _append_child(self.curNode, node)
3360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self._filter and self._filter.acceptNode(node) == FILTER_REJECT:
3370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.curNode.removeChild(node)
3380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def start_cdata_section_handler(self):
3400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._cdata = True
3410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._cdata_continue = False
3420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def end_cdata_section_handler(self):
3440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._cdata = False
3450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._cdata_continue = False
3460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def external_entity_ref_handler(self, context, base, systemId, publicId):
3480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return 1
3490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def first_element_handler(self, name, attributes):
3510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self._filter is None and not self._elem_info:
3520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._finish_end_element = id
3530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.getParser().StartElementHandler = self.start_element_handler
3540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.start_element_handler(name, attributes)
3550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def start_element_handler(self, name, attributes):
3570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        node = self.document.createElement(name)
3580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        _append_child(self.curNode, node)
3590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.curNode = node
3600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if attributes:
3620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            for i in range(0, len(attributes), 2):
3630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                a = minidom.Attr(attributes[i], EMPTY_NAMESPACE,
3640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                 None, EMPTY_PREFIX)
3650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                value = attributes[i+1]
3660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                d = a.childNodes[0].__dict__
3670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                d['data'] = d['nodeValue'] = value
3680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                d = a.__dict__
3690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                d['value'] = d['nodeValue'] = value
3700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                d['ownerDocument'] = self.document
3710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                _set_attribute_node(node, a)
3720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if node is not self.document.documentElement:
3740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._finish_start_element(node)
3750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _finish_start_element(self, node):
3770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self._filter:
3780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # To be general, we'd have to call isSameNode(), but this
3790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # is sufficient for minidom:
3800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if node is self.document.documentElement:
3810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                return
3820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            filt = self._filter.startContainer(node)
3830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if filt == FILTER_REJECT:
3840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # ignore this node & all descendents
3850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                Rejecter(self)
3860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            elif filt == FILTER_SKIP:
3870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # ignore this node, but make it's children become
3880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # children of the parent node
3890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                Skipper(self)
3900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
3910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                return
3920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.curNode = node.parentNode
3930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            node.parentNode.removeChild(node)
3940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            node.unlink()
3950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # If this ever changes, Namespaces.end_element_handler() needs to
3970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # be changed to match.
3980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
3990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def end_element_handler(self, name):
4000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        curNode = self.curNode
4010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.curNode = curNode.parentNode
4020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._finish_end_element(curNode)
4030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _finish_end_element(self, curNode):
4050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        info = self._elem_info.get(curNode.tagName)
4060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if info:
4070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._handle_white_text_nodes(curNode, info)
4080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self._filter:
4090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if curNode is self.document.documentElement:
4100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                return
4110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if self._filter.acceptNode(curNode) == FILTER_REJECT:
4120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.curNode.removeChild(curNode)
4130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                curNode.unlink()
4140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _handle_white_text_nodes(self, node, info):
4160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if (self._options.whitespace_in_element_content
4170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            or not info.isElementContent()):
4180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return
4190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # We have element type information and should remove ignorable
4210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # whitespace; identify for text nodes which contain only
4220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # whitespace.
4230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        L = []
4240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for child in node.childNodes:
4250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if child.nodeType == TEXT_NODE and not child.data.strip():
4260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                L.append(child)
4270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Remove ignorable whitespace from the tree.
4290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for child in L:
4300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            node.removeChild(child)
4310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def element_decl_handler(self, name, model):
4330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        info = self._elem_info.get(name)
4340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if info is None:
4350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._elem_info[name] = ElementInfo(name, model)
4360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
4370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            assert info._model is None
4380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            info._model = model
4390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def attlist_decl_handler(self, elem, name, type, default, required):
4410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        info = self._elem_info.get(elem)
4420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if info is None:
4430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            info = ElementInfo(elem)
4440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._elem_info[elem] = info
4450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        info._attr_info.append(
4460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            [None, name, None, None, default, 0, type, required])
4470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def xml_decl_handler(self, version, encoding, standalone):
4490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.document.version = version
4500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.document.encoding = encoding
4510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # This is still a little ugly, thanks to the pyexpat API. ;-(
4520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if standalone >= 0:
4530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if standalone:
4540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.document.standalone = True
4550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
4560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.document.standalone = False
4570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Don't include FILTER_INTERRUPT, since that's checked separately
4600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# where allowed.
4610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_ALLOWED_FILTER_RETURNS = (FILTER_ACCEPT, FILTER_REJECT, FILTER_SKIP)
4620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4630a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass FilterVisibilityController(object):
4640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Wrapper around a DOMBuilderFilter which implements the checks
4650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    to make the whatToShow filter attribute work."""
4660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    __slots__ = 'filter',
4680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self, filter):
4700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.filter = filter
4710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def startContainer(self, node):
4730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        mask = self._nodetype_mask[node.nodeType]
4740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.filter.whatToShow & mask:
4750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            val = self.filter.startContainer(node)
4760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if val == FILTER_INTERRUPT:
4770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise ParseEscape
4780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if val not in _ALLOWED_FILTER_RETURNS:
4790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise ValueError, \
4800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                      "startContainer() returned illegal value: " + repr(val)
4810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return val
4820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
4830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return FILTER_ACCEPT
4840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def acceptNode(self, node):
4860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        mask = self._nodetype_mask[node.nodeType]
4870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.filter.whatToShow & mask:
4880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            val = self.filter.acceptNode(node)
4890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if val == FILTER_INTERRUPT:
4900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise ParseEscape
4910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if val == FILTER_SKIP:
4920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # move all child nodes to the parent, and remove this node
4930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                parent = node.parentNode
4940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                for child in node.childNodes[:]:
4950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    parent.appendChild(child)
4960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # node is handled by the caller
4970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                return FILTER_REJECT
4980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if val not in _ALLOWED_FILTER_RETURNS:
4990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise ValueError, \
5000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                      "acceptNode() returned illegal value: " + repr(val)
5010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return val
5020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
5030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return FILTER_ACCEPT
5040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    _nodetype_mask = {
5060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        Node.ELEMENT_NODE:                NodeFilter.SHOW_ELEMENT,
5070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        Node.ATTRIBUTE_NODE:              NodeFilter.SHOW_ATTRIBUTE,
5080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        Node.TEXT_NODE:                   NodeFilter.SHOW_TEXT,
5090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        Node.CDATA_SECTION_NODE:          NodeFilter.SHOW_CDATA_SECTION,
5100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        Node.ENTITY_REFERENCE_NODE:       NodeFilter.SHOW_ENTITY_REFERENCE,
5110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        Node.ENTITY_NODE:                 NodeFilter.SHOW_ENTITY,
5120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        Node.PROCESSING_INSTRUCTION_NODE: NodeFilter.SHOW_PROCESSING_INSTRUCTION,
5130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        Node.COMMENT_NODE:                NodeFilter.SHOW_COMMENT,
5140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        Node.DOCUMENT_NODE:               NodeFilter.SHOW_DOCUMENT,
5150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        Node.DOCUMENT_TYPE_NODE:          NodeFilter.SHOW_DOCUMENT_TYPE,
5160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        Node.DOCUMENT_FRAGMENT_NODE:      NodeFilter.SHOW_DOCUMENT_FRAGMENT,
5170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        Node.NOTATION_NODE:               NodeFilter.SHOW_NOTATION,
5180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        }
5190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5210a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass FilterCrutch(object):
5220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    __slots__ = '_builder', '_level', '_old_start', '_old_end'
5230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self, builder):
5250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._level = 0
5260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._builder = builder
5270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser = builder._parser
5280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._old_start = parser.StartElementHandler
5290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._old_end = parser.EndElementHandler
5300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser.StartElementHandler = self.start_element_handler
5310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser.EndElementHandler = self.end_element_handler
5320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5330a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass Rejecter(FilterCrutch):
5340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    __slots__ = ()
5350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self, builder):
5370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        FilterCrutch.__init__(self, builder)
5380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser = builder._parser
5390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for name in ("ProcessingInstructionHandler",
5400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                     "CommentHandler",
5410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                     "CharacterDataHandler",
5420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                     "StartCdataSectionHandler",
5430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                     "EndCdataSectionHandler",
5440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                     "ExternalEntityRefHandler",
5450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                     ):
5460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            setattr(parser, name, None)
5470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def start_element_handler(self, *args):
5490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._level = self._level + 1
5500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def end_element_handler(self, *args):
5520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self._level == 0:
5530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # restore the old handlers
5540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            parser = self._builder._parser
5550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._builder.install(parser)
5560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            parser.StartElementHandler = self._old_start
5570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            parser.EndElementHandler = self._old_end
5580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
5590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._level = self._level - 1
5600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5610a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass Skipper(FilterCrutch):
5620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    __slots__ = ()
5630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def start_element_handler(self, *args):
5650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        node = self._builder.curNode
5660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._old_start(*args)
5670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self._builder.curNode is not node:
5680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._level = self._level + 1
5690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def end_element_handler(self, *args):
5710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self._level == 0:
5720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # We're popping back out of the node we're skipping, so we
5730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # shouldn't need to do anything but reset the handlers.
5740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._builder._parser.StartElementHandler = self._old_start
5750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._builder._parser.EndElementHandler = self._old_end
5760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._builder = None
5770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
5780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._level = self._level - 1
5790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._old_end(*args)
5800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# framework document used by the fragment builder.
5830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Takes a string for the doctype, subset string, and namespace attrs string.
5840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID = \
5860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "http://xml.python.org/entities/fragment-builder/internal"
5870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_FRAGMENT_BUILDER_TEMPLATE = (
5890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    '''\
5900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao<!DOCTYPE wrapper
5910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao  %%s [
5920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao  <!ENTITY fragment-builder-internal
5930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    SYSTEM "%s">
5940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao%%s
5950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao]>
5960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao<wrapper %%s
5970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao>&fragment-builder-internal;</wrapper>'''
5980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    % _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID)
5990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6010a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass FragmentBuilder(ExpatBuilder):
6020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Builder which constructs document fragments given XML source
6030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    text and a context node.
6040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    The context node is expected to provide information about the
6060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    namespace declarations which are in scope at the start of the
6070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    fragment.
6080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """
6090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self, context, options=None):
6110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if context.nodeType == DOCUMENT_NODE:
6120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.originalDocument = context
6130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.context = context
6140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
6150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.originalDocument = context.ownerDocument
6160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.context = context
6170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        ExpatBuilder.__init__(self, options)
6180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def reset(self):
6200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        ExpatBuilder.reset(self)
6210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.fragment = None
6220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def parseFile(self, file):
6240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Parse a document fragment from a file object, returning the
6250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        fragment node."""
6260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.parseString(file.read())
6270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def parseString(self, string):
6290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Parse a document fragment from a string, returning the
6300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        fragment node."""
6310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._source = string
6320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser = self.getParser()
6330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        doctype = self.originalDocument.doctype
6340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        ident = ""
6350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if doctype:
6360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            subset = doctype.internalSubset or self._getDeclarations()
6370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if doctype.publicId:
6380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                ident = ('PUBLIC "%s" "%s"'
6390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                         % (doctype.publicId, doctype.systemId))
6400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            elif doctype.systemId:
6410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                ident = 'SYSTEM "%s"' % doctype.systemId
6420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
6430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            subset = ""
6440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        nsattrs = self._getNSattrs() # get ns decls from node's ancestors
6450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        document = _FRAGMENT_BUILDER_TEMPLATE % (ident, subset, nsattrs)
6460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
6470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            parser.Parse(document, 1)
6480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except:
6490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.reset()
6500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise
6510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        fragment = self.fragment
6520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.reset()
6530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao##         self._parser = None
6540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return fragment
6550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _getDeclarations(self):
6570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Re-create the internal subset from the DocumentType node.
6580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        This is only needed if we don't already have the
6600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        internalSubset as a string.
6610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
6620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        doctype = self.context.ownerDocument.doctype
6630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        s = ""
6640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if doctype:
6650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            for i in range(doctype.notations.length):
6660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                notation = doctype.notations.item(i)
6670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if s:
6680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    s = s + "\n  "
6690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                s = "%s<!NOTATION %s" % (s, notation.nodeName)
6700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if notation.publicId:
6710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    s = '%s PUBLIC "%s"\n             "%s">' \
6720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        % (s, notation.publicId, notation.systemId)
6730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                else:
6740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    s = '%s SYSTEM "%s">' % (s, notation.systemId)
6750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            for i in range(doctype.entities.length):
6760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                entity = doctype.entities.item(i)
6770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if s:
6780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    s = s + "\n  "
6790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                s = "%s<!ENTITY %s" % (s, entity.nodeName)
6800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if entity.publicId:
6810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    s = '%s PUBLIC "%s"\n             "%s"' \
6820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        % (s, entity.publicId, entity.systemId)
6830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                elif entity.systemId:
6840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    s = '%s SYSTEM "%s"' % (s, entity.systemId)
6850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                else:
6860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    s = '%s "%s"' % (s, entity.firstChild.data)
6870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if entity.notationName:
6880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    s = "%s NOTATION %s" % (s, entity.notationName)
6890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                s = s + ">"
6900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return s
6910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _getNSattrs(self):
6930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return ""
6940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def external_entity_ref_handler(self, context, base, systemId, publicId):
6960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if systemId == _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID:
6970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # this entref is the one that we made to put the subtree
6980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # in; all of our given input is parsed in here.
6990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            old_document = self.document
7000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            old_cur_node = self.curNode
7010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            parser = self._parser.ExternalEntityParserCreate(context)
7020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # put the real document back, parse into the fragment to return
7030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.document = self.originalDocument
7040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.fragment = self.document.createDocumentFragment()
7050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.curNode = self.fragment
7060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            try:
7070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                parser.Parse(self._source, 1)
7080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            finally:
7090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.curNode = old_cur_node
7100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.document = old_document
7110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self._source = None
7120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return -1
7130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
7140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return ExpatBuilder.external_entity_ref_handler(
7150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self, context, base, systemId, publicId)
7160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7180a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass Namespaces:
7190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Mix-in class for builders; adds support for namespaces."""
7200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _initNamespaces(self):
7220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # list of (prefix, uri) ns declarations.  Namespace attrs are
7230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # constructed from this and added to the element's attrs.
7240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._ns_ordered_prefixes = []
7250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def createParser(self):
7270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Create a new namespace-handling parser."""
7280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser = expat.ParserCreate(namespace_separator=" ")
7290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser.namespace_prefixes = True
7300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return parser
7310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def install(self, parser):
7330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Insert the namespace-handlers onto the parser."""
7340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        ExpatBuilder.install(self, parser)
7350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self._options.namespace_declarations:
7360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            parser.StartNamespaceDeclHandler = (
7370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.start_namespace_decl_handler)
7380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def start_namespace_decl_handler(self, prefix, uri):
7400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Push this namespace declaration on our storage."""
7410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._ns_ordered_prefixes.append((prefix, uri))
7420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def start_element_handler(self, name, attributes):
7440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if ' ' in name:
7450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            uri, localname, prefix, qname = _parse_ns_name(self, name)
7460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
7470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            uri = EMPTY_NAMESPACE
7480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            qname = name
7490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            localname = None
7500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            prefix = EMPTY_PREFIX
7510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        node = minidom.Element(qname, uri, prefix, localname)
7520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        node.ownerDocument = self.document
7530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        _append_child(self.curNode, node)
7540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.curNode = node
7550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self._ns_ordered_prefixes:
7570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            for prefix, uri in self._ns_ordered_prefixes:
7580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if prefix:
7590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    a = minidom.Attr(_intern(self, 'xmlns:' + prefix),
7600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                     XMLNS_NAMESPACE, prefix, "xmlns")
7610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                else:
7620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    a = minidom.Attr("xmlns", XMLNS_NAMESPACE,
7630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                     "xmlns", EMPTY_PREFIX)
7640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                d = a.childNodes[0].__dict__
7650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                d['data'] = d['nodeValue'] = uri
7660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                d = a.__dict__
7670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                d['value'] = d['nodeValue'] = uri
7680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                d['ownerDocument'] = self.document
7690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                _set_attribute_node(node, a)
7700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            del self._ns_ordered_prefixes[:]
7710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if attributes:
7730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            _attrs = node._attrs
7740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            _attrsNS = node._attrsNS
7750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            for i in range(0, len(attributes), 2):
7760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                aname = attributes[i]
7770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                value = attributes[i+1]
7780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if ' ' in aname:
7790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    uri, localname, prefix, qname = _parse_ns_name(self, aname)
7800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    a = minidom.Attr(qname, uri, localname, prefix)
7810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    _attrs[qname] = a
7820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    _attrsNS[(uri, localname)] = a
7830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                else:
7840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    a = minidom.Attr(aname, EMPTY_NAMESPACE,
7850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                     aname, EMPTY_PREFIX)
7860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    _attrs[aname] = a
7870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    _attrsNS[(EMPTY_NAMESPACE, aname)] = a
7880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                d = a.childNodes[0].__dict__
7890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                d['data'] = d['nodeValue'] = value
7900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                d = a.__dict__
7910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                d['ownerDocument'] = self.document
7920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                d['value'] = d['nodeValue'] = value
7930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                d['ownerElement'] = node
7940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if __debug__:
7960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # This only adds some asserts to the original
7970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # end_element_handler(), so we only define this when -O is not
7980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # used.  If changing one, be sure to check the other to see if
7990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # it needs to be changed as well.
8000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        #
8010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        def end_element_handler(self, name):
8020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            curNode = self.curNode
8030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if ' ' in name:
8040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                uri, localname, prefix, qname = _parse_ns_name(self, name)
8050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                assert (curNode.namespaceURI == uri
8060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        and curNode.localName == localname
8070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        and curNode.prefix == prefix), \
8080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        "element stack messed up! (namespace)"
8090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
8100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                assert curNode.nodeName == name, \
8110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                       "element stack messed up - bad nodeName"
8120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                assert curNode.namespaceURI == EMPTY_NAMESPACE, \
8130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                       "element stack messed up - bad namespaceURI"
8140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.curNode = curNode.parentNode
8150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._finish_end_element(curNode)
8160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8180a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass ExpatBuilderNS(Namespaces, ExpatBuilder):
8190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Document builder that supports namespaces."""
8200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def reset(self):
8220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        ExpatBuilder.reset(self)
8230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._initNamespaces()
8240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8260a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass FragmentBuilderNS(Namespaces, FragmentBuilder):
8270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Fragment builder that supports namespaces."""
8280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def reset(self):
8300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        FragmentBuilder.reset(self)
8310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._initNamespaces()
8320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _getNSattrs(self):
8340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Return string of namespace attributes from this element and
8350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        ancestors."""
8360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # XXX This needs to be re-written to walk the ancestors of the
8370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # context to build up the namespace information from
8380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # declarations, elements, and attributes found in context.
8390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Otherwise we have to store a bunch more data on the DOM
8400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # (though that *might* be more reliable -- not clear).
8410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        attrs = ""
8420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        context = self.context
8430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        L = []
8440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        while context:
8450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if hasattr(context, '_ns_prefix_uri'):
8460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                for prefix, uri in context._ns_prefix_uri.items():
8470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    # add every new NS decl from context to L and attrs string
8480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    if prefix in L:
8490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        continue
8500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    L.append(prefix)
8510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    if prefix:
8520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        declname = "xmlns:" + prefix
8530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    else:
8540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        declname = "xmlns"
8550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    if attrs:
8560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        attrs = "%s\n    %s='%s'" % (attrs, declname, uri)
8570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    else:
8580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        attrs = " %s='%s'" % (declname, uri)
8590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            context = context.parentNode
8600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return attrs
8610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8630a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass ParseEscape(Exception):
8640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Exception raised to short-circuit parsing in InternalSubsetExtractor."""
8650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    pass
8660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8670a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass InternalSubsetExtractor(ExpatBuilder):
8680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """XML processor which can rip out the internal document type subset."""
8690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    subset = None
8710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def getSubset(self):
8730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Return the internal subset as a string."""
8740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.subset
8750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def parseFile(self, file):
8770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
8780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            ExpatBuilder.parseFile(self, file)
8790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except ParseEscape:
8800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            pass
8810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def parseString(self, string):
8830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
8840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            ExpatBuilder.parseString(self, string)
8850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except ParseEscape:
8860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            pass
8870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def install(self, parser):
8890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler
8900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser.StartElementHandler = self.start_element_handler
8910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def start_doctype_decl_handler(self, name, publicId, systemId,
8930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                   has_internal_subset):
8940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if has_internal_subset:
8950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            parser = self.getParser()
8960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.subset = []
8970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            parser.DefaultHandler = self.subset.append
8980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            parser.EndDoctypeDeclHandler = self.end_doctype_decl_handler
8990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
9000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise ParseEscape()
9010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def end_doctype_decl_handler(self):
9030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        s = ''.join(self.subset).replace('\r\n', '\n').replace('\r', '\n')
9040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.subset = s
9050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        raise ParseEscape()
9060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def start_element_handler(self, name, attrs):
9080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        raise ParseEscape()
9090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9110a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef parse(file, namespaces=True):
9120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Parse a document, returning the resulting Document node.
9130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    'file' may be either a file name or an open file object.
9150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """
9160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if namespaces:
9170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        builder = ExpatBuilderNS()
9180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    else:
9190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        builder = ExpatBuilder()
9200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if isinstance(file, StringTypes):
9220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        fp = open(file, 'rb')
9230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
9240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            result = builder.parseFile(fp)
9250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        finally:
9260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            fp.close()
9270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    else:
9280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        result = builder.parseFile(file)
9290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return result
9300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9320a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef parseString(string, namespaces=True):
9330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Parse a document from a string, returning the resulting
9340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    Document node.
9350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """
9360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if namespaces:
9370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        builder = ExpatBuilderNS()
9380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    else:
9390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        builder = ExpatBuilder()
9400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return builder.parseString(string)
9410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9430a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef parseFragment(file, context, namespaces=True):
9440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Parse a fragment of a document, given the context from which it
9450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    was originally extracted.  context should be the parent of the
9460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    node(s) which are in the fragment.
9470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    'file' may be either a file name or an open file object.
9490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """
9500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if namespaces:
9510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        builder = FragmentBuilderNS(context)
9520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    else:
9530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        builder = FragmentBuilder(context)
9540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if isinstance(file, StringTypes):
9560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        fp = open(file, 'rb')
9570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
9580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            result = builder.parseFile(fp)
9590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        finally:
9600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            fp.close()
9610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    else:
9620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        result = builder.parseFile(file)
9630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return result
9640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9660a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef parseFragmentString(string, context, namespaces=True):
9670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Parse a fragment of a document from a string, given the context
9680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    from which it was originally extracted.  context should be the
9690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    parent of the node(s) which are in the fragment.
9700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """
9710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if namespaces:
9720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        builder = FragmentBuilderNS(context)
9730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    else:
9740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        builder = FragmentBuilder(context)
9750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return builder.parseString(string)
9760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9780a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef makeBuilder(options):
9790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Create a builder based on an Options object."""
9800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if options.namespaces:
9810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return ExpatBuilderNS(options)
9820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    else:
9830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return ExpatBuilder(options)
984