expatreader.py revision 1aa2c0f073bdbed4fa824591d53e20bbf3d01add
145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake""" 23f1b5288e51158d60734b434631e5ca9febef916Martin v. LöwisSAX driver for the pyexpat C module. This driver works with 3bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbelpyexpat.__version__ == '2.22'. 445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake""" 545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeversion = "0.20" 745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 8fbdeaad06910a50d6f05da177949b9a451a1132aFred Drakefrom xml.sax._exceptions import * 9fbdeaad06910a50d6f05da177949b9a451a1132aFred Drakefrom xml.sax.handler import feature_validation, feature_namespaces 10fbdeaad06910a50d6f05da177949b9a451a1132aFred Drakefrom xml.sax.handler import feature_namespace_prefixes 11fbdeaad06910a50d6f05da177949b9a451a1132aFred Drakefrom xml.sax.handler import feature_external_ges, feature_external_pes 12fbdeaad06910a50d6f05da177949b9a451a1132aFred Drakefrom xml.sax.handler import feature_string_interning 13fbdeaad06910a50d6f05da177949b9a451a1132aFred Drakefrom xml.sax.handler import property_xml_string, property_interning_dict 14fbdeaad06910a50d6f05da177949b9a451a1132aFred Drake 15fbdeaad06910a50d6f05da177949b9a451a1132aFred Drake# xml.parsers.expat does not raise ImportError in Jython 16fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwisimport sys 17c974bf4dc2031e8af5c64ac968a4a19054f5b097Fred Drakeif sys.platform[:4] == "java": 18fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis raise SAXReaderNotAvailable("expat not available in Java", None) 19fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwisdel sys 20fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis 21962c9e7f9188c75a3889fcc7fd29fb1626e278d0Martin v. Löwistry: 22fbdeaad06910a50d6f05da177949b9a451a1132aFred Drake from xml.parsers import expat 23962c9e7f9188c75a3889fcc7fd29fb1626e278d0Martin v. Löwisexcept ImportError: 24c974bf4dc2031e8af5c64ac968a4a19054f5b097Fred Drake raise SAXReaderNotAvailable("expat not supported", None) 25e3c37d660f5641f55c12313fde8e20f8178d942aJeremy Hyltonelse: 26e3c37d660f5641f55c12313fde8e20f8178d942aJeremy Hylton if not hasattr(expat, "ParserCreate"): 27c974bf4dc2031e8af5c64ac968a4a19054f5b097Fred Drake raise SAXReaderNotAvailable("expat not supported", None) 28fbdeaad06910a50d6f05da177949b9a451a1132aFred Drakefrom xml.sax import xmlreader, saxutils, handler 2945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 3032bf12eb8a5849762721b561f9b48c6e897792e9Lars GustäbelAttributesImpl = xmlreader.AttributesImpl 3132bf12eb8a5849762721b561f9b48c6e897792e9Lars GustäbelAttributesNSImpl = xmlreader.AttributesNSImpl 3232bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 3318476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis# If we're using a sufficiently recent version of Python, we can use 3418476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis# weak references to avoid cycles between the parser and content 3518476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis# handler, otherwise we'll just have to pretend. 3618476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwistry: 3718476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis import _weakref 3818476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwisexcept ImportError: 3918476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis def _mkproxy(o): 4018476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis return o 4118476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwiselse: 4218476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis import weakref 4318476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis _mkproxy = weakref.proxy 4418476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis del weakref, _weakref 45012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake 46012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake# --- ExpatLocator 47012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake 48012c81fc9720c8504da73b26f503b0ef8640da19Fred Drakeclass ExpatLocator(xmlreader.Locator): 49012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake """Locator for use with the ExpatParser class. 50012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake 51012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake This uses a weak reference to the parser object to avoid creating 52012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake a circular reference between the parser and the content handler. 53012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake """ 54012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake def __init__(self, parser): 5518476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis self._ref = _mkproxy(parser) 56012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake 57012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake def getColumnNumber(self): 5818476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis parser = self._ref 5918476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis if parser._parser is None: 60012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake return None 61012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake return parser._parser.ErrorColumnNumber 62012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake 63012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake def getLineNumber(self): 6418476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis parser = self._ref 6518476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis if parser._parser is None: 66012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake return 1 67da204daeaa39019f2134166308c5b9cdfa84b84aFred Drake return parser._parser.ErrorLineNumber 68012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake 69012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake def getPublicId(self): 7018476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis parser = self._ref 71012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake if parser is None: 72012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake return None 73012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake return parser._source.getPublicId() 74012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake 75012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake def getSystemId(self): 7618476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis parser = self._ref 77012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake if parser is None: 78012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake return None 79012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake return parser._source.getSystemId() 80012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake 813f0969f100a565a239f3504b50ab8e31d6e81b14Martin v. Löwis 8245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake# --- ExpatParser 8345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 84ddb486745bbcb912eee2e84791273fa0a8e3c9e2Fred Drakeclass ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator): 853f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis """SAX driver for the pyexpat C module.""" 8645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 8745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def __init__(self, namespaceHandling=0, bufsize=2**16-20): 8845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake xmlreader.IncrementalParser.__init__(self, bufsize) 89e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel self._source = xmlreader.InputSource() 9045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._parser = None 9145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._namespaces = namespaceHandling 920591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis self._lex_handler_prop = None 9345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._parsing = 0 94e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel self._entity_stack = [] 9518476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis self._external_ges = 1 9618476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis self._interning = None 9745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 9845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake # XMLReader methods 9945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 100523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def parse(self, source): 101bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel "Parse an XML document from a URL or an InputSource." 102523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel source = saxutils.prepare_input_source(source) 103523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 104523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self._source = source 10545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self.reset() 106012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake self._cont_handler.setDocumentLocator(ExpatLocator(self)) 10716f6329e6153c4b92f2175a5560e372a762befe6Fred Drake xmlreader.IncrementalParser.parse(self, source) 10845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 109523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def prepareParser(self, source): 1105b63acd31e0e40c1a9a9e9762905b0054ff37994Benjamin Peterson if source.getSystemId() is not None: 1118673ab97cc1930f5f2c5d96667386e09d22d60ecSerhiy Storchaka base = source.getSystemId() 1128673ab97cc1930f5f2c5d96667386e09d22d60ecSerhiy Storchaka if isinstance(base, unicode): 1138673ab97cc1930f5f2c5d96667386e09d22d60ecSerhiy Storchaka base = base.encode('utf-8') 1148673ab97cc1930f5f2c5d96667386e09d22d60ecSerhiy Storchaka self._parser.SetBase(base) 11516f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 1163f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis # Redefined setContentHandler to allow changing handlers during parsing 117fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis 118fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis def setContentHandler(self, handler): 119fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis xmlreader.IncrementalParser.setContentHandler(self, handler) 120fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis if self._parsing: 121fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis self._reset_cont_handler() 122fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis 12345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getFeature(self, name): 12418476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis if name == feature_namespaces: 125f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel return self._namespaces 12618476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis elif name == feature_string_interning: 12718476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis return self._interning is not None 12818476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis elif name in (feature_validation, feature_external_pes, 12918476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis feature_namespace_prefixes): 13018476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis return 0 13118476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis elif name == feature_external_ges: 13218476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis return self._external_ges 13345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise SAXNotRecognizedException("Feature '%s' not recognized" % name) 13445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 13545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def setFeature(self, name, state): 136f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel if self._parsing: 137f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel raise SAXNotSupportedException("Cannot set features while parsing") 13818476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis 13918476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis if name == feature_namespaces: 140f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel self._namespaces = state 14118476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis elif name == feature_external_ges: 14218476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis self._external_ges = state 14318476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis elif name == feature_string_interning: 14418476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis if state: 14518476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis if self._interning is None: 14618476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis self._interning = {} 14718476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis else: 14818476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis self._interning = None 14918476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis elif name == feature_validation: 15018476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis if state: 1513f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis raise SAXNotSupportedException( 1523f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis "expat does not support validation") 15318476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis elif name == feature_external_pes: 15418476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis if state: 1553f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis raise SAXNotSupportedException( 1563f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis "expat does not read external parameter entities") 15718476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis elif name == feature_namespace_prefixes: 15818476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis if state: 1593f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis raise SAXNotSupportedException( 1603f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis "expat does not report namespace prefixes") 161f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel else: 1623f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis raise SAXNotRecognizedException( 1633f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis "Feature '%s' not recognized" % name) 16445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 16545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getProperty(self, name): 1660591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis if name == handler.property_lexical_handler: 1670591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis return self._lex_handler_prop 16818476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis elif name == property_interning_dict: 16918476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis return self._interning 17018476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis elif name == property_xml_string: 17118476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis if self._parser: 17218476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis if hasattr(self._parser, "GetInputContext"): 17318476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis return self._parser.GetInputContext() 17418476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis else: 1753f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis raise SAXNotRecognizedException( 1763f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis "This version of expat does not support getting" 1773f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis " the XML string") 17818476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis else: 1793f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis raise SAXNotSupportedException( 1803f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis "XML string cannot be returned when not parsing") 18145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise SAXNotRecognizedException("Property '%s' not recognized" % name) 18245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 18345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def setProperty(self, name, value): 1840591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis if name == handler.property_lexical_handler: 1850591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis self._lex_handler_prop = value 186fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis if self._parsing: 187fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis self._reset_lex_handler_prop() 18818476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis elif name == property_interning_dict: 18918476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis self._interning = value 19018476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis elif name == property_xml_string: 19118476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis raise SAXNotSupportedException("Property '%s' cannot be set" % 19218476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis name) 1930591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis else: 19418476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis raise SAXNotRecognizedException("Property '%s' not recognized" % 19518476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis name) 19645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 19745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake # IncrementalParser methods 19845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 199ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis def feed(self, data, isFinal = 0): 20045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake if not self._parsing: 20145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self.reset() 20255b4efd034780a069c9bbf5b080a62df32f51441Lars Gustäbel self._parsing = 1 20345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._cont_handler.startDocument() 204f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel 205ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis try: 206ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis # The isFinal parameter is internal to the expat reader. 207ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis # If it is set to true, expat will check validity of the entire 208ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis # document. When feeding chunks, they are not normally final - 209ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis # except when invoked from close. 210ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis self._parser.Parse(data, isFinal) 2113f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis except expat.error, e: 2123f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis exc = SAXParseException(expat.ErrorString(e.code), e, self) 2130591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis # FIXME: when to invoke error()? 21404f4943d132d0d5e9829923706a2cb07a2b0ae9fMartin v. Löwis self._err_handler.fatalError(exc) 21545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 21645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def close(self): 2171aa2c0f073bdbed4fa824591d53e20bbf3d01addSerhiy Storchaka if self._entity_stack or self._parser is None: 218ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis # If we are completing an external entity, do nothing here 219ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis return 2201aa2c0f073bdbed4fa824591d53e20bbf3d01addSerhiy Storchaka try: 2211aa2c0f073bdbed4fa824591d53e20bbf3d01addSerhiy Storchaka self.feed("", isFinal = 1) 2221aa2c0f073bdbed4fa824591d53e20bbf3d01addSerhiy Storchaka self._cont_handler.endDocument() 2231aa2c0f073bdbed4fa824591d53e20bbf3d01addSerhiy Storchaka finally: 2241aa2c0f073bdbed4fa824591d53e20bbf3d01addSerhiy Storchaka self._parsing = 0 2251aa2c0f073bdbed4fa824591d53e20bbf3d01addSerhiy Storchaka # break cycle created by expat handlers pointing to our methods 2261aa2c0f073bdbed4fa824591d53e20bbf3d01addSerhiy Storchaka self._parser = None 22716f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 228fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis def _reset_cont_handler(self): 229fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis self._parser.ProcessingInstructionHandler = \ 230fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis self._cont_handler.processingInstruction 231fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis self._parser.CharacterDataHandler = self._cont_handler.characters 232fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis 233fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis def _reset_lex_handler_prop(self): 2343f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis lex = self._lex_handler_prop 2353f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis parser = self._parser 2363f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis if lex is None: 2373f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis parser.CommentHandler = None 2383f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis parser.StartCdataSectionHandler = None 2393f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis parser.EndCdataSectionHandler = None 2403f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis parser.StartDoctypeDeclHandler = None 2413f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis parser.EndDoctypeDeclHandler = None 2423f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis else: 2433f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis parser.CommentHandler = lex.comment 2443f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis parser.StartCdataSectionHandler = lex.startCDATA 2453f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis parser.EndCdataSectionHandler = lex.endCDATA 2463f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis parser.StartDoctypeDeclHandler = self.start_doctype_decl 2473f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis parser.EndDoctypeDeclHandler = lex.endDTD 248fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis 24945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def reset(self): 25045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake if self._namespaces: 251593d6b311e03b745e7b736f3d72269a684359924Andrew M. Kuchling self._parser = expat.ParserCreate(self._source.getEncoding(), " ", 2523f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis intern=self._interning) 2533f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis self._parser.namespace_prefixes = 1 25445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._parser.StartElementHandler = self.start_element_ns 25545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._parser.EndElementHandler = self.end_element_ns 25645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake else: 257593d6b311e03b745e7b736f3d72269a684359924Andrew M. Kuchling self._parser = expat.ParserCreate(self._source.getEncoding(), 258593d6b311e03b745e7b736f3d72269a684359924Andrew M. Kuchling intern = self._interning) 2596c4753f925467e5908a43d2fec6d15b76e878d42Paul Prescod self._parser.StartElementHandler = self.start_element 2606c4753f925467e5908a43d2fec6d15b76e878d42Paul Prescod self._parser.EndElementHandler = self.end_element 26145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 262fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis self._reset_cont_handler() 26345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl 26445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._parser.NotationDeclHandler = self.notation_decl 26545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._parser.StartNamespaceDeclHandler = self.start_namespace_decl 26645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._parser.EndNamespaceDeclHandler = self.end_namespace_decl 26770d39a60a80e2fcf21b05e899d43f6dab49f839eMartin v. Löwis 2680591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis self._decl_handler_prop = None 2690591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis if self._lex_handler_prop: 270fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis self._reset_lex_handler_prop() 27170d39a60a80e2fcf21b05e899d43f6dab49f839eMartin v. Löwis# self._parser.DefaultHandler = 27270d39a60a80e2fcf21b05e899d43f6dab49f839eMartin v. Löwis# self._parser.DefaultHandlerExpand = 27370d39a60a80e2fcf21b05e899d43f6dab49f839eMartin v. Löwis# self._parser.NotStandaloneHandler = 27445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._parser.ExternalEntityRefHandler = self.external_entity_ref 2753f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis try: 2763f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis self._parser.SkippedEntityHandler = self.skipped_entity_handler 2773f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis except AttributeError: 2783f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis # This pyexpat does not support SkippedEntity 2793f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis pass 2803f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis self._parser.SetParamEntityParsing( 2813f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) 282bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel 28355b4efd034780a069c9bbf5b080a62df32f51441Lars Gustäbel self._parsing = 0 284bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel self._entity_stack = [] 28516f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 28645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake # Locator methods 28745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 28845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getColumnNumber(self): 2890591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis if self._parser is None: 2900591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis return None 29145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._parser.ErrorColumnNumber 29245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 29345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getLineNumber(self): 2940591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis if self._parser is None: 2950591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis return 1 29645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._parser.ErrorLineNumber 29745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 29845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getPublicId(self): 29945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._source.getPublicId() 30045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 30145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getSystemId(self): 302ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis return self._source.getSystemId() 30316f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 30445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake # event handlers 30545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def start_element(self, name, attrs): 30632bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel self._cont_handler.startElement(name, AttributesImpl(attrs)) 30745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 30845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def end_element(self, name): 309f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel self._cont_handler.endElement(name) 31045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 31145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def start_element_ns(self, name, attrs): 312ab199622905b2621b2ad9abcb324fb5f124cc12fNeal Norwitz pair = name.split() 31345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake if len(pair) == 1: 3143f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis # no namespace 315f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel pair = (None, name) 3163f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis elif len(pair) == 3: 3173f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis pair = pair[0], pair[1] 318d2f5a9ac4b161018945cdb5e5a26a722ae86cdb9Lars Gustäbel else: 3193f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis # default namespace 320d2f5a9ac4b161018945cdb5e5a26a722ae86cdb9Lars Gustäbel pair = tuple(pair) 32145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 32232bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel newattrs = {} 3233f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis qnames = {} 32432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel for (aname, value) in attrs.items(): 3253f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis parts = aname.split() 3263f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis length = len(parts) 3273f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis if length == 1: 3283f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis # no namespace 3293f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis qname = aname 33032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel apair = (None, aname) 3313f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis elif length == 3: 3323f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis qname = "%s:%s" % (parts[2], parts[1]) 3333f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis apair = parts[0], parts[1] 33432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel else: 3353f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis # default namespace 3363f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis qname = parts[1] 3373f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis apair = tuple(parts) 33832bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 33932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel newattrs[apair] = value 3403f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis qnames[apair] = qname 34132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 34216f6329e6153c4b92f2175a5560e372a762befe6Fred Drake self._cont_handler.startElementNS(pair, None, 3433f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis AttributesNSImpl(newattrs, qnames)) 34445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 34545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def end_element_ns(self, name): 346ab199622905b2621b2ad9abcb324fb5f124cc12fNeal Norwitz pair = name.split() 34745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake if len(pair) == 1: 34832bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel pair = (None, name) 3493f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis elif len(pair) == 3: 3503f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis pair = pair[0], pair[1] 3510591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis else: 3520591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis pair = tuple(pair) 35316f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 354f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel self._cont_handler.endElementNS(pair, None) 35545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 356f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel # this is not used (call directly to ContentHandler) 35745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def processing_instruction(self, target, data): 35845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._cont_handler.processingInstruction(target, data) 35945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 360f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel # this is not used (call directly to ContentHandler) 36145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def character_data(self, data): 36245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._cont_handler.characters(data) 36345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 36445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def start_namespace_decl(self, prefix, uri): 36545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._cont_handler.startPrefixMapping(prefix, uri) 36645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 36745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def end_namespace_decl(self, prefix): 36845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._cont_handler.endPrefixMapping(prefix) 36916f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 370456ab1d2712dc9cebd878966c8fb16af47ea79f0Martin v. Löwis def start_doctype_decl(self, name, sysid, pubid, has_internal_subset): 3713f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis self._lex_handler_prop.startDTD(name, pubid, sysid) 3723f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis 37345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name): 37445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name) 37545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 37645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def notation_decl(self, name, base, sysid, pubid): 37745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._dtd_handler.notationDecl(name, pubid, sysid) 37845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 37945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def external_entity_ref(self, context, base, sysid, pubid): 38018476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis if not self._external_ges: 38118476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis return 1 382d1b516c274aa1502514d7b3c51f63894480560e1Martin v. Löwis 38345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake source = self._ent_handler.resolveEntity(pubid, sysid) 384e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel source = saxutils.prepare_input_source(source, 385e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel self._source.getSystemId() or 386e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel "") 38716f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 388e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel self._entity_stack.append((self._parser, self._source)) 389e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel self._parser = self._parser.ExternalEntityParserCreate(context) 390e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel self._source = source 391e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel 392e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel try: 393e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel xmlreader.IncrementalParser.parse(self, source) 394e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel except: 395e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel return 0 # FIXME: save error info here? 396e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel 397e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel (self._parser, self._source) = self._entity_stack[-1] 398e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel del self._entity_stack[-1] 39945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return 1 40016f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 4013f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis def skipped_entity_handler(self, name, is_pe): 4023f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis if is_pe: 4033f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis # The SAX spec requires to report skipped PEs with a '%' 4043f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis name = '%'+name 4053f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis self._cont_handler.skippedEntity(name) 4063f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis 40745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake# --- 40816f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 40945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakedef create_parser(*args, **kwargs): 41068468eba635570400f607e140425a222018e56f9Guido van Rossum return ExpatParser(*args, **kwargs) 41116f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 41245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake# --- 41345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 41445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeif __name__ == "__main__": 415e71bd81a890cea830681880bc5d7298dd98d71ceAmaury Forgeot d'Arc import xml.sax.saxutils 41645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake p = create_parser() 417e71bd81a890cea830681880bc5d7298dd98d71ceAmaury Forgeot d'Arc p.setContentHandler(xml.sax.saxutils.XMLGenerator()) 418fbdeaad06910a50d6f05da177949b9a451a1132aFred Drake p.setErrorHandler(xml.sax.ErrorHandler()) 419e71bd81a890cea830681880bc5d7298dd98d71ceAmaury Forgeot d'Arc p.parse("http://www.ibiblio.org/xml/examples/shakespeare/hamlet.xml") 420