145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake"""
23f1b5288e51158d60734b434631e5ca9febef916Martin v. LöwisSAX driver for the pyexpat C module.  This driver works with
3bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbelpyexpat.__version__ == '2.22'.
445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake"""
545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeversion = "0.20"
745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
8fbdeaad06910a50d6f05da177949b9a451a1132aFred Drakefrom xml.sax._exceptions import *
9fbdeaad06910a50d6f05da177949b9a451a1132aFred Drakefrom xml.sax.handler import feature_validation, feature_namespaces
10fbdeaad06910a50d6f05da177949b9a451a1132aFred Drakefrom xml.sax.handler import feature_namespace_prefixes
11fbdeaad06910a50d6f05da177949b9a451a1132aFred Drakefrom xml.sax.handler import feature_external_ges, feature_external_pes
12fbdeaad06910a50d6f05da177949b9a451a1132aFred Drakefrom xml.sax.handler import feature_string_interning
13fbdeaad06910a50d6f05da177949b9a451a1132aFred Drakefrom xml.sax.handler import property_xml_string, property_interning_dict
14fbdeaad06910a50d6f05da177949b9a451a1132aFred Drake
15fbdeaad06910a50d6f05da177949b9a451a1132aFred Drake# xml.parsers.expat does not raise ImportError in Jython
16fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwisimport sys
17c974bf4dc2031e8af5c64ac968a4a19054f5b097Fred Drakeif sys.platform[:4] == "java":
18fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis    raise SAXReaderNotAvailable("expat not available in Java", None)
19fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwisdel sys
20fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis
21962c9e7f9188c75a3889fcc7fd29fb1626e278d0Martin v. Löwistry:
22fbdeaad06910a50d6f05da177949b9a451a1132aFred Drake    from xml.parsers import expat
23962c9e7f9188c75a3889fcc7fd29fb1626e278d0Martin v. Löwisexcept ImportError:
24c974bf4dc2031e8af5c64ac968a4a19054f5b097Fred Drake    raise SAXReaderNotAvailable("expat not supported", None)
25e3c37d660f5641f55c12313fde8e20f8178d942aJeremy Hyltonelse:
26e3c37d660f5641f55c12313fde8e20f8178d942aJeremy Hylton    if not hasattr(expat, "ParserCreate"):
27c974bf4dc2031e8af5c64ac968a4a19054f5b097Fred Drake        raise SAXReaderNotAvailable("expat not supported", None)
28fbdeaad06910a50d6f05da177949b9a451a1132aFred Drakefrom xml.sax import xmlreader, saxutils, handler
2945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
3032bf12eb8a5849762721b561f9b48c6e897792e9Lars GustäbelAttributesImpl = xmlreader.AttributesImpl
3132bf12eb8a5849762721b561f9b48c6e897792e9Lars GustäbelAttributesNSImpl = xmlreader.AttributesNSImpl
3232bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
3318476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis# If we're using a sufficiently recent version of Python, we can use
3418476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis# weak references to avoid cycles between the parser and content
3518476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis# handler, otherwise we'll just have to pretend.
3618476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwistry:
3718476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis    import _weakref
3818476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwisexcept ImportError:
3918476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis    def _mkproxy(o):
4018476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        return o
4118476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwiselse:
4218476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis    import weakref
4318476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis    _mkproxy = weakref.proxy
4418476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis    del weakref, _weakref
45012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake
463234abb9a057beb88faeef96745f8c78772a88c2Serhiy Storchakaclass _ClosedParser:
473234abb9a057beb88faeef96745f8c78772a88c2Serhiy Storchaka    pass
483234abb9a057beb88faeef96745f8c78772a88c2Serhiy Storchaka
49012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake# --- ExpatLocator
50012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake
51012c81fc9720c8504da73b26f503b0ef8640da19Fred Drakeclass ExpatLocator(xmlreader.Locator):
52012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake    """Locator for use with the ExpatParser class.
53012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake
54012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake    This uses a weak reference to the parser object to avoid creating
55012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake    a circular reference between the parser and the content handler.
56012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake    """
57012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake    def __init__(self, parser):
5818476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        self._ref = _mkproxy(parser)
59012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake
60012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake    def getColumnNumber(self):
6118476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        parser = self._ref
6218476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        if parser._parser is None:
63012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake            return None
64012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake        return parser._parser.ErrorColumnNumber
65012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake
66012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake    def getLineNumber(self):
6718476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        parser = self._ref
6818476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        if parser._parser is None:
69012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake            return 1
70da204daeaa39019f2134166308c5b9cdfa84b84aFred Drake        return parser._parser.ErrorLineNumber
71012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake
72012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake    def getPublicId(self):
7318476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        parser = self._ref
74012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake        if parser is None:
75012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake            return None
76012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake        return parser._source.getPublicId()
77012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake
78012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake    def getSystemId(self):
7918476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        parser = self._ref
80012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake        if parser is None:
81012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake            return None
82012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake        return parser._source.getSystemId()
83012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake
843f0969f100a565a239f3504b50ab8e31d6e81b14Martin v. Löwis
8545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake# --- ExpatParser
8645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
87ddb486745bbcb912eee2e84791273fa0a8e3c9e2Fred Drakeclass ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
883f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis    """SAX driver for the pyexpat C module."""
8945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
9045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def __init__(self, namespaceHandling=0, bufsize=2**16-20):
9145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        xmlreader.IncrementalParser.__init__(self, bufsize)
92e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel        self._source = xmlreader.InputSource()
9345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._parser = None
9445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._namespaces = namespaceHandling
950591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis        self._lex_handler_prop = None
9645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._parsing = 0
97e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel        self._entity_stack = []
9818476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        self._external_ges = 1
9918476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        self._interning = None
10045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
10145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    # XMLReader methods
10245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
103523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def parse(self, source):
104bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel        "Parse an XML document from a URL or an InputSource."
105523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        source = saxutils.prepare_input_source(source)
106523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
107523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self._source = source
10845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self.reset()
109012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake        self._cont_handler.setDocumentLocator(ExpatLocator(self))
11016f6329e6153c4b92f2175a5560e372a762befe6Fred Drake        xmlreader.IncrementalParser.parse(self, source)
11145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
112523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def prepareParser(self, source):
1135b63acd31e0e40c1a9a9e9762905b0054ff37994Benjamin Peterson        if source.getSystemId() is not None:
1148673ab97cc1930f5f2c5d96667386e09d22d60ecSerhiy Storchaka            base = source.getSystemId()
1158673ab97cc1930f5f2c5d96667386e09d22d60ecSerhiy Storchaka            if isinstance(base, unicode):
1168673ab97cc1930f5f2c5d96667386e09d22d60ecSerhiy Storchaka                base = base.encode('utf-8')
1178673ab97cc1930f5f2c5d96667386e09d22d60ecSerhiy Storchaka            self._parser.SetBase(base)
11816f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
1193f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis    # Redefined setContentHandler to allow changing handlers during parsing
120fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis
121fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis    def setContentHandler(self, handler):
122fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis        xmlreader.IncrementalParser.setContentHandler(self, handler)
123fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis        if self._parsing:
124fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis            self._reset_cont_handler()
125fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis
12645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getFeature(self, name):
12718476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        if name == feature_namespaces:
128f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel            return self._namespaces
12918476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        elif name == feature_string_interning:
13018476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis            return self._interning is not None
13118476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        elif name in (feature_validation, feature_external_pes,
13218476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis                      feature_namespace_prefixes):
13318476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis            return 0
13418476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        elif name == feature_external_ges:
13518476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis            return self._external_ges
13645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
13745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
13845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def setFeature(self, name, state):
139f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel        if self._parsing:
140f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel            raise SAXNotSupportedException("Cannot set features while parsing")
14118476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis
14218476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        if name == feature_namespaces:
143f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel            self._namespaces = state
14418476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        elif name == feature_external_ges:
14518476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis            self._external_ges = state
14618476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        elif name == feature_string_interning:
14718476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis            if state:
14818476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis                if self._interning is None:
14918476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis                    self._interning = {}
15018476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis            else:
15118476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis                self._interning = None
15218476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        elif name == feature_validation:
15318476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis            if state:
1543f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                raise SAXNotSupportedException(
1553f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                    "expat does not support validation")
15618476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        elif name == feature_external_pes:
15718476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis            if state:
1583f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                raise SAXNotSupportedException(
1593f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                    "expat does not read external parameter entities")
16018476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        elif name == feature_namespace_prefixes:
16118476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis            if state:
1623f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                raise SAXNotSupportedException(
1633f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                    "expat does not report namespace prefixes")
164f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel        else:
1653f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            raise SAXNotRecognizedException(
1663f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                "Feature '%s' not recognized" % name)
16745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
16845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getProperty(self, name):
1690591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis        if name == handler.property_lexical_handler:
1700591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis            return self._lex_handler_prop
17118476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        elif name == property_interning_dict:
17218476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis            return self._interning
17318476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        elif name == property_xml_string:
17418476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis            if self._parser:
17518476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis                if hasattr(self._parser, "GetInputContext"):
17618476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis                    return self._parser.GetInputContext()
17718476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis                else:
1783f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                    raise SAXNotRecognizedException(
1793f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                        "This version of expat does not support getting"
1803f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                        " the XML string")
18118476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis            else:
1823f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                raise SAXNotSupportedException(
1833f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                    "XML string cannot be returned when not parsing")
18445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise SAXNotRecognizedException("Property '%s' not recognized" % name)
18545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
18645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def setProperty(self, name, value):
1870591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis        if name == handler.property_lexical_handler:
1880591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis            self._lex_handler_prop = value
189fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis            if self._parsing:
190fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis                self._reset_lex_handler_prop()
19118476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        elif name == property_interning_dict:
19218476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis            self._interning = value
19318476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        elif name == property_xml_string:
19418476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis            raise SAXNotSupportedException("Property '%s' cannot be set" %
19518476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis                                           name)
1960591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis        else:
19718476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis            raise SAXNotRecognizedException("Property '%s' not recognized" %
19818476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis                                            name)
19945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
20045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    # IncrementalParser methods
20145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
202ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis    def feed(self, data, isFinal = 0):
20345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        if not self._parsing:
20445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake            self.reset()
20555b4efd034780a069c9bbf5b080a62df32f51441Lars Gustäbel            self._parsing = 1
20645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake            self._cont_handler.startDocument()
207f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel
208ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis        try:
209ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis            # The isFinal parameter is internal to the expat reader.
210ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis            # If it is set to true, expat will check validity of the entire
211ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis            # document. When feeding chunks, they are not normally final -
212ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis            # except when invoked from close.
213ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis            self._parser.Parse(data, isFinal)
2143f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis        except expat.error, e:
2153f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            exc = SAXParseException(expat.ErrorString(e.code), e, self)
2160591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis            # FIXME: when to invoke error()?
21704f4943d132d0d5e9829923706a2cb07a2b0ae9fMartin v. Löwis            self._err_handler.fatalError(exc)
21845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
21945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def close(self):
2203234abb9a057beb88faeef96745f8c78772a88c2Serhiy Storchaka        if (self._entity_stack or self._parser is None or
2213234abb9a057beb88faeef96745f8c78772a88c2Serhiy Storchaka            isinstance(self._parser, _ClosedParser)):
222ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis            # If we are completing an external entity, do nothing here
223ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis            return
2241aa2c0f073bdbed4fa824591d53e20bbf3d01addSerhiy Storchaka        try:
2251aa2c0f073bdbed4fa824591d53e20bbf3d01addSerhiy Storchaka            self.feed("", isFinal = 1)
2261aa2c0f073bdbed4fa824591d53e20bbf3d01addSerhiy Storchaka            self._cont_handler.endDocument()
2271aa2c0f073bdbed4fa824591d53e20bbf3d01addSerhiy Storchaka            self._parsing = 0
2281aa2c0f073bdbed4fa824591d53e20bbf3d01addSerhiy Storchaka            # break cycle created by expat handlers pointing to our methods
2291aa2c0f073bdbed4fa824591d53e20bbf3d01addSerhiy Storchaka            self._parser = None
2303234abb9a057beb88faeef96745f8c78772a88c2Serhiy Storchaka        finally:
2313234abb9a057beb88faeef96745f8c78772a88c2Serhiy Storchaka            self._parsing = 0
2323234abb9a057beb88faeef96745f8c78772a88c2Serhiy Storchaka            if self._parser is not None:
2333234abb9a057beb88faeef96745f8c78772a88c2Serhiy Storchaka                # Keep ErrorColumnNumber and ErrorLineNumber after closing.
2343234abb9a057beb88faeef96745f8c78772a88c2Serhiy Storchaka                parser = _ClosedParser()
2353234abb9a057beb88faeef96745f8c78772a88c2Serhiy Storchaka                parser.ErrorColumnNumber = self._parser.ErrorColumnNumber
2363234abb9a057beb88faeef96745f8c78772a88c2Serhiy Storchaka                parser.ErrorLineNumber = self._parser.ErrorLineNumber
2373234abb9a057beb88faeef96745f8c78772a88c2Serhiy Storchaka                self._parser = parser
23816f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
239fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis    def _reset_cont_handler(self):
240fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis        self._parser.ProcessingInstructionHandler = \
241fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis                                    self._cont_handler.processingInstruction
242fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis        self._parser.CharacterDataHandler = self._cont_handler.characters
243fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis
244fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis    def _reset_lex_handler_prop(self):
2453f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis        lex = self._lex_handler_prop
2463f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis        parser = self._parser
2473f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis        if lex is None:
2483f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            parser.CommentHandler = None
2493f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            parser.StartCdataSectionHandler = None
2503f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            parser.EndCdataSectionHandler = None
2513f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            parser.StartDoctypeDeclHandler = None
2523f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            parser.EndDoctypeDeclHandler = None
2533f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis        else:
2543f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            parser.CommentHandler = lex.comment
2553f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            parser.StartCdataSectionHandler = lex.startCDATA
2563f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            parser.EndCdataSectionHandler = lex.endCDATA
2573f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            parser.StartDoctypeDeclHandler = self.start_doctype_decl
2583f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            parser.EndDoctypeDeclHandler = lex.endDTD
259fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis
26045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def reset(self):
26145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        if self._namespaces:
262593d6b311e03b745e7b736f3d72269a684359924Andrew M. Kuchling            self._parser = expat.ParserCreate(self._source.getEncoding(), " ",
2633f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                                              intern=self._interning)
2643f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            self._parser.namespace_prefixes = 1
26545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake            self._parser.StartElementHandler = self.start_element_ns
26645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake            self._parser.EndElementHandler = self.end_element_ns
26745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        else:
268593d6b311e03b745e7b736f3d72269a684359924Andrew M. Kuchling            self._parser = expat.ParserCreate(self._source.getEncoding(),
269593d6b311e03b745e7b736f3d72269a684359924Andrew M. Kuchling                                              intern = self._interning)
2706c4753f925467e5908a43d2fec6d15b76e878d42Paul Prescod            self._parser.StartElementHandler = self.start_element
2716c4753f925467e5908a43d2fec6d15b76e878d42Paul Prescod            self._parser.EndElementHandler = self.end_element
27245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
273fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis        self._reset_cont_handler()
27445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
27545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._parser.NotationDeclHandler = self.notation_decl
27645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
27745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
27870d39a60a80e2fcf21b05e899d43f6dab49f839eMartin v. Löwis
2790591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis        self._decl_handler_prop = None
2800591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis        if self._lex_handler_prop:
281fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis            self._reset_lex_handler_prop()
28270d39a60a80e2fcf21b05e899d43f6dab49f839eMartin v. Löwis#         self._parser.DefaultHandler =
28370d39a60a80e2fcf21b05e899d43f6dab49f839eMartin v. Löwis#         self._parser.DefaultHandlerExpand =
28470d39a60a80e2fcf21b05e899d43f6dab49f839eMartin v. Löwis#         self._parser.NotStandaloneHandler =
28545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._parser.ExternalEntityRefHandler = self.external_entity_ref
2863f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis        try:
2873f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            self._parser.SkippedEntityHandler = self.skipped_entity_handler
2883f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis        except AttributeError:
2893f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            # This pyexpat does not support SkippedEntity
2903f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            pass
2913f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis        self._parser.SetParamEntityParsing(
2923f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
293bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel
29455b4efd034780a069c9bbf5b080a62df32f51441Lars Gustäbel        self._parsing = 0
295bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel        self._entity_stack = []
29616f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
29745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    # Locator methods
29845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
29945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getColumnNumber(self):
3000591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis        if self._parser is None:
3010591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis            return None
30245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._parser.ErrorColumnNumber
30345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
30445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getLineNumber(self):
3050591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis        if self._parser is None:
3060591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis            return 1
30745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._parser.ErrorLineNumber
30845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
30945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getPublicId(self):
31045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._source.getPublicId()
31145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
31245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getSystemId(self):
313ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis        return self._source.getSystemId()
31416f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
31545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    # event handlers
31645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def start_element(self, name, attrs):
31732bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        self._cont_handler.startElement(name, AttributesImpl(attrs))
31845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
31945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def end_element(self, name):
320f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel        self._cont_handler.endElement(name)
32145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
32245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def start_element_ns(self, name, attrs):
323ab199622905b2621b2ad9abcb324fb5f124cc12fNeal Norwitz        pair = name.split()
32445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        if len(pair) == 1:
3253f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            # no namespace
326f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel            pair = (None, name)
3273f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis        elif len(pair) == 3:
3283f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            pair = pair[0], pair[1]
329d2f5a9ac4b161018945cdb5e5a26a722ae86cdb9Lars Gustäbel        else:
3303f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            # default namespace
331d2f5a9ac4b161018945cdb5e5a26a722ae86cdb9Lars Gustäbel            pair = tuple(pair)
33245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
33332bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        newattrs = {}
3343f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis        qnames = {}
33532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        for (aname, value) in attrs.items():
3363f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            parts = aname.split()
3373f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            length = len(parts)
3383f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            if length == 1:
3393f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                # no namespace
3403f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                qname = aname
34132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel                apair = (None, aname)
3423f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            elif length == 3:
3433f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                qname = "%s:%s" % (parts[2], parts[1])
3443f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                apair = parts[0], parts[1]
34532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel            else:
3463f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                # default namespace
3473f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                qname = parts[1]
3483f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                apair = tuple(parts)
34932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
35032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel            newattrs[apair] = value
3513f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            qnames[apair] = qname
35232bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
35316f6329e6153c4b92f2175a5560e372a762befe6Fred Drake        self._cont_handler.startElementNS(pair, None,
3543f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis                                          AttributesNSImpl(newattrs, qnames))
35545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
35645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def end_element_ns(self, name):
357ab199622905b2621b2ad9abcb324fb5f124cc12fNeal Norwitz        pair = name.split()
35845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        if len(pair) == 1:
35932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel            pair = (None, name)
3603f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis        elif len(pair) == 3:
3613f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            pair = pair[0], pair[1]
3620591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis        else:
3630591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis            pair = tuple(pair)
36416f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
365f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel        self._cont_handler.endElementNS(pair, None)
36645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
367f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel    # this is not used (call directly to ContentHandler)
36845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def processing_instruction(self, target, data):
36945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._cont_handler.processingInstruction(target, data)
37045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
371f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel    # this is not used (call directly to ContentHandler)
37245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def character_data(self, data):
37345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._cont_handler.characters(data)
37445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
37545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def start_namespace_decl(self, prefix, uri):
37645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._cont_handler.startPrefixMapping(prefix, uri)
37745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
37845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def end_namespace_decl(self, prefix):
37945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._cont_handler.endPrefixMapping(prefix)
38016f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
381456ab1d2712dc9cebd878966c8fb16af47ea79f0Martin v. Löwis    def start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
3823f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis        self._lex_handler_prop.startDTD(name, pubid, sysid)
3833f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis
38445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
38545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
38645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
38745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def notation_decl(self, name, base, sysid, pubid):
38845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._dtd_handler.notationDecl(name, pubid, sysid)
38945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
39045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def external_entity_ref(self, context, base, sysid, pubid):
39118476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis        if not self._external_ges:
39218476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis            return 1
393d1b516c274aa1502514d7b3c51f63894480560e1Martin v. Löwis
39445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        source = self._ent_handler.resolveEntity(pubid, sysid)
395e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel        source = saxutils.prepare_input_source(source,
396e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel                                               self._source.getSystemId() or
397e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel                                               "")
39816f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
399e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel        self._entity_stack.append((self._parser, self._source))
400e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel        self._parser = self._parser.ExternalEntityParserCreate(context)
401e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel        self._source = source
402e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel
403e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel        try:
404e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel            xmlreader.IncrementalParser.parse(self, source)
405e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel        except:
406e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel            return 0  # FIXME: save error info here?
407e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel
408e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel        (self._parser, self._source) = self._entity_stack[-1]
409e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel        del self._entity_stack[-1]
41045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return 1
41116f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
4123f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis    def skipped_entity_handler(self, name, is_pe):
4133f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis        if is_pe:
4143f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            # The SAX spec requires to report skipped PEs with a '%'
4153f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis            name = '%'+name
4163f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis        self._cont_handler.skippedEntity(name)
4173f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis
41845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake# ---
41916f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
42045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakedef create_parser(*args, **kwargs):
42168468eba635570400f607e140425a222018e56f9Guido van Rossum    return ExpatParser(*args, **kwargs)
42216f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
42345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake# ---
42445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
42545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeif __name__ == "__main__":
426e71bd81a890cea830681880bc5d7298dd98d71ceAmaury Forgeot d'Arc    import xml.sax.saxutils
42745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    p = create_parser()
428e71bd81a890cea830681880bc5d7298dd98d71ceAmaury Forgeot d'Arc    p.setContentHandler(xml.sax.saxutils.XMLGenerator())
429fbdeaad06910a50d6f05da177949b9a451a1132aFred Drake    p.setErrorHandler(xml.sax.ErrorHandler())
430e71bd81a890cea830681880bc5d7298dd98d71ceAmaury Forgeot d'Arc    p.parse("http://www.ibiblio.org/xml/examples/shakespeare/hamlet.xml")
431