expatreader.py revision 1aa2c0f073bdbed4fa824591d53e20bbf3d01add
145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake"""
23f1b5288e51158d60734b434631e5ca9febef916Martin v. LöwisSAX driver for the pyexpat C module.  This driver works with
3bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbelpyexpat.__version__ == '2.22'.
445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake"""
545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeversion = "0.20"
745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
8fbdeaad06910a50d6f05da177949b9a451a1132aFred Drakefrom xml.sax._exceptions import *
9fbdeaad06910a50d6f05da177949b9a451a1132aFred Drakefrom xml.sax.handler import feature_validation, feature_namespaces
10fbdeaad06910a50d6f05da177949b9a451a1132aFred Drakefrom xml.sax.handler import feature_namespace_prefixes
11fbdeaad06910a50d6f05da177949b9a451a1132aFred Drakefrom xml.sax.handler import feature_external_ges, feature_external_pes
12fbdeaad06910a50d6f05da177949b9a451a1132aFred Drakefrom xml.sax.handler import feature_string_interning
13fbdeaad06910a50d6f05da177949b9a451a1132aFred Drakefrom xml.sax.handler import property_xml_string, property_interning_dict
14fbdeaad06910a50d6f05da177949b9a451a1132aFred Drake
15fbdeaad06910a50d6f05da177949b9a451a1132aFred Drake# xml.parsers.expat does not raise ImportError in Jython
16fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwisimport sys
17c974bf4dc2031e8af5c64ac968a4a19054f5b097Fred Drakeif sys.platform[:4] == "java":
18fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis    raise SAXReaderNotAvailable("expat not available in Java", None)
19fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwisdel sys
20fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis
21962c9e7f9188c75a3889fcc7fd29fb1626e278d0Martin v. Löwistry:
22fbdeaad06910a50d6f05da177949b9a451a1132aFred Drake    from xml.parsers import expat
23962c9e7f9188c75a3889fcc7fd29fb1626e278d0Martin v. Löwisexcept ImportError:
24c974bf4dc2031e8af5c64ac968a4a19054f5b097Fred Drake    raise SAXReaderNotAvailable("expat not supported", None)
25e3c37d660f5641f55c12313fde8e20f8178d942aJeremy Hyltonelse:
26e3c37d660f5641f55c12313fde8e20f8178d942aJeremy Hylton    if not hasattr(expat, "ParserCreate"):
27c974bf4dc2031e8af5c64ac968a4a19054f5b097Fred Drake        raise SAXReaderNotAvailable("expat not supported", None)
28fbdeaad06910a50d6f05da177949b9a451a1132aFred Drakefrom xml.sax import xmlreader, saxutils, handler
2945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
3032bf12eb8a5849762721b561f9b48c6e897792e9Lars GustäbelAttributesImpl = xmlreader.AttributesImpl
3132bf12eb8a5849762721b561f9b48c6e897792e9Lars GustäbelAttributesNSImpl = xmlreader.AttributesNSImpl
3232bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
3318476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis# If we're using a sufficiently recent version of Python, we can use
3418476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis# weak references to avoid cycles between the parser and content
3518476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis# handler, otherwise we'll just have to pretend.
3618476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwistry:
3718476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis    import _weakref
3818476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwisexcept ImportError:
3918476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis    def _mkproxy(o):
4018476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        return o
4118476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwiselse:
4218476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis    import weakref
4318476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis    _mkproxy = weakref.proxy
4418476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis    del weakref, _weakref
45012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake
46012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake# --- ExpatLocator
47012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake
48012c81fc9720c8504da73b26f503b0ef8640da19Fred Drakeclass ExpatLocator(xmlreader.Locator):
49012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake    """Locator for use with the ExpatParser class.
50012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake
51012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake    This uses a weak reference to the parser object to avoid creating
52012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake    a circular reference between the parser and the content handler.
53012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake    """
54012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake    def __init__(self, parser):
5518476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        self._ref = _mkproxy(parser)
56012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake
57012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake    def getColumnNumber(self):
5818476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        parser = self._ref
5918476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        if parser._parser is None:
60012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake            return None
61012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake        return parser._parser.ErrorColumnNumber
62012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake
63012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake    def getLineNumber(self):
6418476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        parser = self._ref
6518476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        if parser._parser is None:
66012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake            return 1
67da204daeaa39019f2134166308c5b9cdfa84b84aFred Drake        return parser._parser.ErrorLineNumber
68012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake
69012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake    def getPublicId(self):
7018476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        parser = self._ref
71012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake        if parser is None:
72012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake            return None
73012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake        return parser._source.getPublicId()
74012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake
75012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake    def getSystemId(self):
7618476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        parser = self._ref
77012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake        if parser is None:
78012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake            return None
79012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake        return parser._source.getSystemId()
80012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake
813f0969f100a565a239f3504b50ab8e31d6e81b14Martin v. Löwis
8245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake# --- ExpatParser
8345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
84ddb486745bbcb912eee2e84791273fa0a8e3c9e2Fred Drakeclass ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
853f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis    """SAX driver for the pyexpat C module."""
8645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
8745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def __init__(self, namespaceHandling=0, bufsize=2**16-20):
8845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        xmlreader.IncrementalParser.__init__(self, bufsize)
89e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel        self._source = xmlreader.InputSource()
9045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._parser = None
9145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._namespaces = namespaceHandling
920591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis        self._lex_handler_prop = None
9345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._parsing = 0
94e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel        self._entity_stack = []
9518476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        self._external_ges = 1
9618476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        self._interning = None
9745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
9845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    # XMLReader methods
9945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
100523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def parse(self, source):
101bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel        "Parse an XML document from a URL or an InputSource."
102523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        source = saxutils.prepare_input_source(source)
103523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
104523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self._source = source
10545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self.reset()
106012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake        self._cont_handler.setDocumentLocator(ExpatLocator(self))
10716f6329e6153c4b92f2175a5560e372a762befe6Fred Drake        xmlreader.IncrementalParser.parse(self, source)
10845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
109523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def prepareParser(self, source):
1105b63acd31e0e40c1a9a9e9762905b0054ff37994Benjamin Peterson        if source.getSystemId() is not None:
1118673ab97cc1930f5f2c5d96667386e09d22d60ecSerhiy Storchaka            base = source.getSystemId()
1128673ab97cc1930f5f2c5d96667386e09d22d60ecSerhiy Storchaka            if isinstance(base, unicode):
1138673ab97cc1930f5f2c5d96667386e09d22d60ecSerhiy Storchaka                base = base.encode('utf-8')
1148673ab97cc1930f5f2c5d96667386e09d22d60ecSerhiy Storchaka            self._parser.SetBase(base)
11516f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
1163f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis    # Redefined setContentHandler to allow changing handlers during parsing
117fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis
118fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis    def setContentHandler(self, handler):
119fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis        xmlreader.IncrementalParser.setContentHandler(self, handler)
120fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis        if self._parsing:
121fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis            self._reset_cont_handler()
122fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis
12345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getFeature(self, name):
12418476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        if name == feature_namespaces:
125f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel            return self._namespaces
12618476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        elif name == feature_string_interning:
12718476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis            return self._interning is not None
12818476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        elif name in (feature_validation, feature_external_pes,
12918476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis                      feature_namespace_prefixes):
13018476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis            return 0
13118476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        elif name == feature_external_ges:
13218476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis            return self._external_ges
13345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
13445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
13545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def setFeature(self, name, state):
136f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel        if self._parsing:
137f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel            raise SAXNotSupportedException("Cannot set features while parsing")
13818476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis
13918476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        if name == feature_namespaces:
140f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel            self._namespaces = state
14118476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        elif name == feature_external_ges:
14218476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis            self._external_ges = state
14318476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        elif name == feature_string_interning:
14418476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis            if state:
14518476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis                if self._interning is None:
14618476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis                    self._interning = {}
14718476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis            else:
14818476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis                self._interning = None
14918476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        elif name == feature_validation:
15018476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis            if state:
1513f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                raise SAXNotSupportedException(
1523f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                    "expat does not support validation")
15318476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        elif name == feature_external_pes:
15418476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis            if state:
1553f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                raise SAXNotSupportedException(
1563f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                    "expat does not read external parameter entities")
15718476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        elif name == feature_namespace_prefixes:
15818476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis            if state:
1593f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                raise SAXNotSupportedException(
1603f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                    "expat does not report namespace prefixes")
161f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel        else:
1623f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            raise SAXNotRecognizedException(
1633f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                "Feature '%s' not recognized" % name)
16445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
16545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getProperty(self, name):
1660591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis        if name == handler.property_lexical_handler:
1670591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis            return self._lex_handler_prop
16818476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        elif name == property_interning_dict:
16918476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis            return self._interning
17018476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        elif name == property_xml_string:
17118476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis            if self._parser:
17218476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis                if hasattr(self._parser, "GetInputContext"):
17318476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis                    return self._parser.GetInputContext()
17418476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis                else:
1753f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                    raise SAXNotRecognizedException(
1763f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                        "This version of expat does not support getting"
1773f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                        " the XML string")
17818476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis            else:
1793f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                raise SAXNotSupportedException(
1803f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                    "XML string cannot be returned when not parsing")
18145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise SAXNotRecognizedException("Property '%s' not recognized" % name)
18245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
18345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def setProperty(self, name, value):
1840591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis        if name == handler.property_lexical_handler:
1850591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis            self._lex_handler_prop = value
186fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis            if self._parsing:
187fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis                self._reset_lex_handler_prop()
18818476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        elif name == property_interning_dict:
18918476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis            self._interning = value
19018476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        elif name == property_xml_string:
19118476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis            raise SAXNotSupportedException("Property '%s' cannot be set" %
19218476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis                                           name)
1930591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis        else:
19418476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis            raise SAXNotRecognizedException("Property '%s' not recognized" %
19518476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis                                            name)
19645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
19745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    # IncrementalParser methods
19845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
199ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis    def feed(self, data, isFinal = 0):
20045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        if not self._parsing:
20145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake            self.reset()
20255b4efd034780a069c9bbf5b080a62df32f51441Lars Gustäbel            self._parsing = 1
20345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake            self._cont_handler.startDocument()
204f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel
205ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis        try:
206ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis            # The isFinal parameter is internal to the expat reader.
207ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis            # If it is set to true, expat will check validity of the entire
208ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis            # document. When feeding chunks, they are not normally final -
209ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis            # except when invoked from close.
210ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis            self._parser.Parse(data, isFinal)
2113f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis        except expat.error, e:
2123f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            exc = SAXParseException(expat.ErrorString(e.code), e, self)
2130591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis            # FIXME: when to invoke error()?
21404f4943d132d0d5e9829923706a2cb07a2b0ae9fMartin v. Löwis            self._err_handler.fatalError(exc)
21545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
21645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def close(self):
2171aa2c0f073bdbed4fa824591d53e20bbf3d01addSerhiy Storchaka        if self._entity_stack or self._parser is None:
218ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis            # If we are completing an external entity, do nothing here
219ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis            return
2201aa2c0f073bdbed4fa824591d53e20bbf3d01addSerhiy Storchaka        try:
2211aa2c0f073bdbed4fa824591d53e20bbf3d01addSerhiy Storchaka            self.feed("", isFinal = 1)
2221aa2c0f073bdbed4fa824591d53e20bbf3d01addSerhiy Storchaka            self._cont_handler.endDocument()
2231aa2c0f073bdbed4fa824591d53e20bbf3d01addSerhiy Storchaka        finally:
2241aa2c0f073bdbed4fa824591d53e20bbf3d01addSerhiy Storchaka            self._parsing = 0
2251aa2c0f073bdbed4fa824591d53e20bbf3d01addSerhiy Storchaka            # break cycle created by expat handlers pointing to our methods
2261aa2c0f073bdbed4fa824591d53e20bbf3d01addSerhiy Storchaka            self._parser = None
22716f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
228fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis    def _reset_cont_handler(self):
229fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis        self._parser.ProcessingInstructionHandler = \
230fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis                                    self._cont_handler.processingInstruction
231fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis        self._parser.CharacterDataHandler = self._cont_handler.characters
232fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis
233fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis    def _reset_lex_handler_prop(self):
2343f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis        lex = self._lex_handler_prop
2353f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis        parser = self._parser
2363f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis        if lex is None:
2373f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            parser.CommentHandler = None
2383f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            parser.StartCdataSectionHandler = None
2393f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            parser.EndCdataSectionHandler = None
2403f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            parser.StartDoctypeDeclHandler = None
2413f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            parser.EndDoctypeDeclHandler = None
2423f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis        else:
2433f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            parser.CommentHandler = lex.comment
2443f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            parser.StartCdataSectionHandler = lex.startCDATA
2453f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            parser.EndCdataSectionHandler = lex.endCDATA
2463f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            parser.StartDoctypeDeclHandler = self.start_doctype_decl
2473f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            parser.EndDoctypeDeclHandler = lex.endDTD
248fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis
24945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def reset(self):
25045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        if self._namespaces:
251593d6b311e03b745e7b736f3d72269a684359924Andrew M. Kuchling            self._parser = expat.ParserCreate(self._source.getEncoding(), " ",
2523f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                                              intern=self._interning)
2533f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            self._parser.namespace_prefixes = 1
25445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake            self._parser.StartElementHandler = self.start_element_ns
25545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake            self._parser.EndElementHandler = self.end_element_ns
25645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        else:
257593d6b311e03b745e7b736f3d72269a684359924Andrew M. Kuchling            self._parser = expat.ParserCreate(self._source.getEncoding(),
258593d6b311e03b745e7b736f3d72269a684359924Andrew M. Kuchling                                              intern = self._interning)
2596c4753f925467e5908a43d2fec6d15b76e878d42Paul Prescod            self._parser.StartElementHandler = self.start_element
2606c4753f925467e5908a43d2fec6d15b76e878d42Paul Prescod            self._parser.EndElementHandler = self.end_element
26145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
262fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis        self._reset_cont_handler()
26345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
26445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._parser.NotationDeclHandler = self.notation_decl
26545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
26645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
26770d39a60a80e2fcf21b05e899d43f6dab49f839eMartin v. Löwis
2680591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis        self._decl_handler_prop = None
2690591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis        if self._lex_handler_prop:
270fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis            self._reset_lex_handler_prop()
27170d39a60a80e2fcf21b05e899d43f6dab49f839eMartin v. Löwis#         self._parser.DefaultHandler =
27270d39a60a80e2fcf21b05e899d43f6dab49f839eMartin v. Löwis#         self._parser.DefaultHandlerExpand =
27370d39a60a80e2fcf21b05e899d43f6dab49f839eMartin v. Löwis#         self._parser.NotStandaloneHandler =
27445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._parser.ExternalEntityRefHandler = self.external_entity_ref
2753f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis        try:
2763f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            self._parser.SkippedEntityHandler = self.skipped_entity_handler
2773f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis        except AttributeError:
2783f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            # This pyexpat does not support SkippedEntity
2793f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            pass
2803f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis        self._parser.SetParamEntityParsing(
2813f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
282bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel
28355b4efd034780a069c9bbf5b080a62df32f51441Lars Gustäbel        self._parsing = 0
284bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel        self._entity_stack = []
28516f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
28645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    # Locator methods
28745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
28845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getColumnNumber(self):
2890591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis        if self._parser is None:
2900591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis            return None
29145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._parser.ErrorColumnNumber
29245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
29345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getLineNumber(self):
2940591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis        if self._parser is None:
2950591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis            return 1
29645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._parser.ErrorLineNumber
29745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
29845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getPublicId(self):
29945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._source.getPublicId()
30045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
30145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getSystemId(self):
302ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis        return self._source.getSystemId()
30316f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
30445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    # event handlers
30545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def start_element(self, name, attrs):
30632bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        self._cont_handler.startElement(name, AttributesImpl(attrs))
30745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
30845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def end_element(self, name):
309f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel        self._cont_handler.endElement(name)
31045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
31145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def start_element_ns(self, name, attrs):
312ab199622905b2621b2ad9abcb324fb5f124cc12fNeal Norwitz        pair = name.split()
31345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        if len(pair) == 1:
3143f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            # no namespace
315f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel            pair = (None, name)
3163f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis        elif len(pair) == 3:
3173f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            pair = pair[0], pair[1]
318d2f5a9ac4b161018945cdb5e5a26a722ae86cdb9Lars Gustäbel        else:
3193f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            # default namespace
320d2f5a9ac4b161018945cdb5e5a26a722ae86cdb9Lars Gustäbel            pair = tuple(pair)
32145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
32232bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        newattrs = {}
3233f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis        qnames = {}
32432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        for (aname, value) in attrs.items():
3253f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            parts = aname.split()
3263f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            length = len(parts)
3273f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            if length == 1:
3283f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                # no namespace
3293f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                qname = aname
33032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel                apair = (None, aname)
3313f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            elif length == 3:
3323f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                qname = "%s:%s" % (parts[2], parts[1])
3333f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                apair = parts[0], parts[1]
33432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel            else:
3353f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                # default namespace
3363f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                qname = parts[1]
3373f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                apair = tuple(parts)
33832bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
33932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel            newattrs[apair] = value
3403f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            qnames[apair] = qname
34132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
34216f6329e6153c4b92f2175a5560e372a762befe6Fred Drake        self._cont_handler.startElementNS(pair, None,
3433f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                                          AttributesNSImpl(newattrs, qnames))
34445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
34545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def end_element_ns(self, name):
346ab199622905b2621b2ad9abcb324fb5f124cc12fNeal Norwitz        pair = name.split()
34745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        if len(pair) == 1:
34832bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel            pair = (None, name)
3493f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis        elif len(pair) == 3:
3503f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            pair = pair[0], pair[1]
3510591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis        else:
3520591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis            pair = tuple(pair)
35316f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
354f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel        self._cont_handler.endElementNS(pair, None)
35545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
356f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel    # this is not used (call directly to ContentHandler)
35745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def processing_instruction(self, target, data):
35845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._cont_handler.processingInstruction(target, data)
35945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
360f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel    # this is not used (call directly to ContentHandler)
36145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def character_data(self, data):
36245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._cont_handler.characters(data)
36345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
36445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def start_namespace_decl(self, prefix, uri):
36545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._cont_handler.startPrefixMapping(prefix, uri)
36645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
36745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def end_namespace_decl(self, prefix):
36845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._cont_handler.endPrefixMapping(prefix)
36916f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
370456ab1d2712dc9cebd878966c8fb16af47ea79f0Martin v. Löwis    def start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
3713f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis        self._lex_handler_prop.startDTD(name, pubid, sysid)
3723f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis
37345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
37445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
37545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
37645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def notation_decl(self, name, base, sysid, pubid):
37745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._dtd_handler.notationDecl(name, pubid, sysid)
37845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
37945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def external_entity_ref(self, context, base, sysid, pubid):
38018476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        if not self._external_ges:
38118476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis            return 1
382d1b516c274aa1502514d7b3c51f63894480560e1Martin v. Löwis
38345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        source = self._ent_handler.resolveEntity(pubid, sysid)
384e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel        source = saxutils.prepare_input_source(source,
385e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel                                               self._source.getSystemId() or
386e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel                                               "")
38716f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
388e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel        self._entity_stack.append((self._parser, self._source))
389e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel        self._parser = self._parser.ExternalEntityParserCreate(context)
390e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel        self._source = source
391e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel
392e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel        try:
393e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel            xmlreader.IncrementalParser.parse(self, source)
394e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel        except:
395e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel            return 0  # FIXME: save error info here?
396e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel
397e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel        (self._parser, self._source) = self._entity_stack[-1]
398e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel        del self._entity_stack[-1]
39945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return 1
40016f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
4013f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis    def skipped_entity_handler(self, name, is_pe):
4023f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis        if is_pe:
4033f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            # The SAX spec requires to report skipped PEs with a '%'
4043f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            name = '%'+name
4053f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis        self._cont_handler.skippedEntity(name)
4063f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis
40745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake# ---
40816f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
40945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakedef create_parser(*args, **kwargs):
41068468eba635570400f607e140425a222018e56f9Guido van Rossum    return ExpatParser(*args, **kwargs)
41116f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
41245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake# ---
41345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
41445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeif __name__ == "__main__":
415e71bd81a890cea830681880bc5d7298dd98d71ceAmaury Forgeot d'Arc    import xml.sax.saxutils
41645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    p = create_parser()
417e71bd81a890cea830681880bc5d7298dd98d71ceAmaury Forgeot d'Arc    p.setContentHandler(xml.sax.saxutils.XMLGenerator())
418fbdeaad06910a50d6f05da177949b9a451a1132aFred Drake    p.setErrorHandler(xml.sax.ErrorHandler())
419e71bd81a890cea830681880bc5d7298dd98d71ceAmaury Forgeot d'Arc    p.parse("http://www.ibiblio.org/xml/examples/shakespeare/hamlet.xml")
420