145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake""" 23f1b5288e51158d60734b434631e5ca9febef916Martin v. LöwisSAX driver for the pyexpat C module. This driver works with 3bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbelpyexpat.__version__ == '2.22'. 445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake""" 545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeversion = "0.20" 745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 8fbdeaad06910a50d6f05da177949b9a451a1132aFred Drakefrom xml.sax._exceptions import * 9fbdeaad06910a50d6f05da177949b9a451a1132aFred Drakefrom xml.sax.handler import feature_validation, feature_namespaces 10fbdeaad06910a50d6f05da177949b9a451a1132aFred Drakefrom xml.sax.handler import feature_namespace_prefixes 11fbdeaad06910a50d6f05da177949b9a451a1132aFred Drakefrom xml.sax.handler import feature_external_ges, feature_external_pes 12fbdeaad06910a50d6f05da177949b9a451a1132aFred Drakefrom xml.sax.handler import feature_string_interning 13fbdeaad06910a50d6f05da177949b9a451a1132aFred Drakefrom xml.sax.handler import property_xml_string, property_interning_dict 14fbdeaad06910a50d6f05da177949b9a451a1132aFred Drake 15fbdeaad06910a50d6f05da177949b9a451a1132aFred Drake# xml.parsers.expat does not raise ImportError in Jython 16fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwisimport sys 17c974bf4dc2031e8af5c64ac968a4a19054f5b097Fred Drakeif sys.platform[:4] == "java": 18fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis raise SAXReaderNotAvailable("expat not available in Java", None) 19fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwisdel sys 20fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis 21962c9e7f9188c75a3889fcc7fd29fb1626e278d0Martin v. Löwistry: 22fbdeaad06910a50d6f05da177949b9a451a1132aFred Drake from xml.parsers import expat 23962c9e7f9188c75a3889fcc7fd29fb1626e278d0Martin v. Löwisexcept ImportError: 24c974bf4dc2031e8af5c64ac968a4a19054f5b097Fred Drake raise SAXReaderNotAvailable("expat not supported", None) 25e3c37d660f5641f55c12313fde8e20f8178d942aJeremy Hyltonelse: 26e3c37d660f5641f55c12313fde8e20f8178d942aJeremy Hylton if not hasattr(expat, "ParserCreate"): 27c974bf4dc2031e8af5c64ac968a4a19054f5b097Fred Drake raise SAXReaderNotAvailable("expat not supported", None) 28fbdeaad06910a50d6f05da177949b9a451a1132aFred Drakefrom xml.sax import xmlreader, saxutils, handler 2945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 3032bf12eb8a5849762721b561f9b48c6e897792e9Lars GustäbelAttributesImpl = xmlreader.AttributesImpl 3132bf12eb8a5849762721b561f9b48c6e897792e9Lars GustäbelAttributesNSImpl = xmlreader.AttributesNSImpl 3232bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 3318476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis# If we're using a sufficiently recent version of Python, we can use 3418476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis# weak references to avoid cycles between the parser and content 3518476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis# handler, otherwise we'll just have to pretend. 3618476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwistry: 3718476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis import _weakref 3818476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwisexcept ImportError: 3918476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis def _mkproxy(o): 4018476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis return o 4118476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwiselse: 4218476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis import weakref 4318476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis _mkproxy = weakref.proxy 4418476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis del weakref, _weakref 45012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake 463234abb9a057beb88faeef96745f8c78772a88c2Serhiy Storchakaclass _ClosedParser: 473234abb9a057beb88faeef96745f8c78772a88c2Serhiy Storchaka pass 483234abb9a057beb88faeef96745f8c78772a88c2Serhiy Storchaka 49012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake# --- ExpatLocator 50012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake 51012c81fc9720c8504da73b26f503b0ef8640da19Fred Drakeclass ExpatLocator(xmlreader.Locator): 52012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake """Locator for use with the ExpatParser class. 53012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake 54012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake This uses a weak reference to the parser object to avoid creating 55012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake a circular reference between the parser and the content handler. 56012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake """ 57012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake def __init__(self, parser): 5818476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis self._ref = _mkproxy(parser) 59012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake 60012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake def getColumnNumber(self): 6118476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis parser = self._ref 6218476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis if parser._parser is None: 63012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake return None 64012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake return parser._parser.ErrorColumnNumber 65012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake 66012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake def getLineNumber(self): 6718476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis parser = self._ref 6818476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis if parser._parser is None: 69012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake return 1 70da204daeaa39019f2134166308c5b9cdfa84b84aFred Drake return parser._parser.ErrorLineNumber 71012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake 72012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake def getPublicId(self): 7318476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis parser = self._ref 74012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake if parser is None: 75012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake return None 76012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake return parser._source.getPublicId() 77012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake 78012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake def getSystemId(self): 7918476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis parser = self._ref 80012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake if parser is None: 81012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake return None 82012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake return parser._source.getSystemId() 83012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake 843f0969f100a565a239f3504b50ab8e31d6e81b14Martin v. Löwis 8545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake# --- ExpatParser 8645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 87ddb486745bbcb912eee2e84791273fa0a8e3c9e2Fred Drakeclass ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator): 883f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis """SAX driver for the pyexpat C module.""" 8945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 9045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def __init__(self, namespaceHandling=0, bufsize=2**16-20): 9145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake xmlreader.IncrementalParser.__init__(self, bufsize) 92e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel self._source = xmlreader.InputSource() 9345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._parser = None 9445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._namespaces = namespaceHandling 950591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis self._lex_handler_prop = None 9645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._parsing = 0 97e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel self._entity_stack = [] 9818476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis self._external_ges = 1 9918476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis self._interning = None 10045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 10145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake # XMLReader methods 10245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 103523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def parse(self, source): 104bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel "Parse an XML document from a URL or an InputSource." 105523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel source = saxutils.prepare_input_source(source) 106523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 107523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self._source = source 10845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self.reset() 109012c81fc9720c8504da73b26f503b0ef8640da19Fred Drake self._cont_handler.setDocumentLocator(ExpatLocator(self)) 11016f6329e6153c4b92f2175a5560e372a762befe6Fred Drake xmlreader.IncrementalParser.parse(self, source) 11145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 112523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def prepareParser(self, source): 1135b63acd31e0e40c1a9a9e9762905b0054ff37994Benjamin Peterson if source.getSystemId() is not None: 1148673ab97cc1930f5f2c5d96667386e09d22d60ecSerhiy Storchaka base = source.getSystemId() 1158673ab97cc1930f5f2c5d96667386e09d22d60ecSerhiy Storchaka if isinstance(base, unicode): 1168673ab97cc1930f5f2c5d96667386e09d22d60ecSerhiy Storchaka base = base.encode('utf-8') 1178673ab97cc1930f5f2c5d96667386e09d22d60ecSerhiy Storchaka self._parser.SetBase(base) 11816f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 1193f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis # Redefined setContentHandler to allow changing handlers during parsing 120fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis 121fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis def setContentHandler(self, handler): 122fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis xmlreader.IncrementalParser.setContentHandler(self, handler) 123fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis if self._parsing: 124fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis self._reset_cont_handler() 125fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis 12645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getFeature(self, name): 12718476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis if name == feature_namespaces: 128f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel return self._namespaces 12918476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis elif name == feature_string_interning: 13018476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis return self._interning is not None 13118476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis elif name in (feature_validation, feature_external_pes, 13218476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis feature_namespace_prefixes): 13318476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis return 0 13418476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis elif name == feature_external_ges: 13518476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis return self._external_ges 13645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise SAXNotRecognizedException("Feature '%s' not recognized" % name) 13745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 13845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def setFeature(self, name, state): 139f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel if self._parsing: 140f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel raise SAXNotSupportedException("Cannot set features while parsing") 14118476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis 14218476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis if name == feature_namespaces: 143f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel self._namespaces = state 14418476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis elif name == feature_external_ges: 14518476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis self._external_ges = state 14618476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis elif name == feature_string_interning: 14718476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis if state: 14818476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis if self._interning is None: 14918476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis self._interning = {} 15018476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis else: 15118476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis self._interning = None 15218476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis elif name == feature_validation: 15318476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis if state: 1543f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis raise SAXNotSupportedException( 1553f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis "expat does not support validation") 15618476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis elif name == feature_external_pes: 15718476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis if state: 1583f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis raise SAXNotSupportedException( 1593f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis "expat does not read external parameter entities") 16018476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis elif name == feature_namespace_prefixes: 16118476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis if state: 1623f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis raise SAXNotSupportedException( 1633f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis "expat does not report namespace prefixes") 164f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel else: 1653f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis raise SAXNotRecognizedException( 1663f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis "Feature '%s' not recognized" % name) 16745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 16845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getProperty(self, name): 1690591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis if name == handler.property_lexical_handler: 1700591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis return self._lex_handler_prop 17118476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis elif name == property_interning_dict: 17218476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis return self._interning 17318476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis elif name == property_xml_string: 17418476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis if self._parser: 17518476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis if hasattr(self._parser, "GetInputContext"): 17618476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis return self._parser.GetInputContext() 17718476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis else: 1783f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis raise SAXNotRecognizedException( 1793f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis "This version of expat does not support getting" 1803f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis " the XML string") 18118476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis else: 1823f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis raise SAXNotSupportedException( 1833f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis "XML string cannot be returned when not parsing") 18445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise SAXNotRecognizedException("Property '%s' not recognized" % name) 18545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 18645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def setProperty(self, name, value): 1870591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis if name == handler.property_lexical_handler: 1880591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis self._lex_handler_prop = value 189fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis if self._parsing: 190fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis self._reset_lex_handler_prop() 19118476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis elif name == property_interning_dict: 19218476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis self._interning = value 19318476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis elif name == property_xml_string: 19418476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis raise SAXNotSupportedException("Property '%s' cannot be set" % 19518476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis name) 1960591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis else: 19718476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis raise SAXNotRecognizedException("Property '%s' not recognized" % 19818476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis name) 19945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 20045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake # IncrementalParser methods 20145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 202ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis def feed(self, data, isFinal = 0): 20345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake if not self._parsing: 20445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self.reset() 20555b4efd034780a069c9bbf5b080a62df32f51441Lars Gustäbel self._parsing = 1 20645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._cont_handler.startDocument() 207f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel 208ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis try: 209ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis # The isFinal parameter is internal to the expat reader. 210ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis # If it is set to true, expat will check validity of the entire 211ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis # document. When feeding chunks, they are not normally final - 212ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis # except when invoked from close. 213ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis self._parser.Parse(data, isFinal) 2143f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis except expat.error, e: 2153f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis exc = SAXParseException(expat.ErrorString(e.code), e, self) 2160591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis # FIXME: when to invoke error()? 21704f4943d132d0d5e9829923706a2cb07a2b0ae9fMartin v. Löwis self._err_handler.fatalError(exc) 21845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 21945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def close(self): 2203234abb9a057beb88faeef96745f8c78772a88c2Serhiy Storchaka if (self._entity_stack or self._parser is None or 2213234abb9a057beb88faeef96745f8c78772a88c2Serhiy Storchaka isinstance(self._parser, _ClosedParser)): 222ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis # If we are completing an external entity, do nothing here 223ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis return 2241aa2c0f073bdbed4fa824591d53e20bbf3d01addSerhiy Storchaka try: 2251aa2c0f073bdbed4fa824591d53e20bbf3d01addSerhiy Storchaka self.feed("", isFinal = 1) 2261aa2c0f073bdbed4fa824591d53e20bbf3d01addSerhiy Storchaka self._cont_handler.endDocument() 2271aa2c0f073bdbed4fa824591d53e20bbf3d01addSerhiy Storchaka self._parsing = 0 2281aa2c0f073bdbed4fa824591d53e20bbf3d01addSerhiy Storchaka # break cycle created by expat handlers pointing to our methods 2291aa2c0f073bdbed4fa824591d53e20bbf3d01addSerhiy Storchaka self._parser = None 2303234abb9a057beb88faeef96745f8c78772a88c2Serhiy Storchaka finally: 2313234abb9a057beb88faeef96745f8c78772a88c2Serhiy Storchaka self._parsing = 0 2323234abb9a057beb88faeef96745f8c78772a88c2Serhiy Storchaka if self._parser is not None: 2333234abb9a057beb88faeef96745f8c78772a88c2Serhiy Storchaka # Keep ErrorColumnNumber and ErrorLineNumber after closing. 2343234abb9a057beb88faeef96745f8c78772a88c2Serhiy Storchaka parser = _ClosedParser() 2353234abb9a057beb88faeef96745f8c78772a88c2Serhiy Storchaka parser.ErrorColumnNumber = self._parser.ErrorColumnNumber 2363234abb9a057beb88faeef96745f8c78772a88c2Serhiy Storchaka parser.ErrorLineNumber = self._parser.ErrorLineNumber 2373234abb9a057beb88faeef96745f8c78772a88c2Serhiy Storchaka self._parser = parser 23816f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 239fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis def _reset_cont_handler(self): 240fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis self._parser.ProcessingInstructionHandler = \ 241fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis self._cont_handler.processingInstruction 242fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis self._parser.CharacterDataHandler = self._cont_handler.characters 243fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis 244fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis def _reset_lex_handler_prop(self): 2453f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis lex = self._lex_handler_prop 2463f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis parser = self._parser 2473f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis if lex is None: 2483f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis parser.CommentHandler = None 2493f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis parser.StartCdataSectionHandler = None 2503f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis parser.EndCdataSectionHandler = None 2513f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis parser.StartDoctypeDeclHandler = None 2523f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis parser.EndDoctypeDeclHandler = None 2533f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis else: 2543f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis parser.CommentHandler = lex.comment 2553f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis parser.StartCdataSectionHandler = lex.startCDATA 2563f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis parser.EndCdataSectionHandler = lex.endCDATA 2573f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis parser.StartDoctypeDeclHandler = self.start_doctype_decl 2583f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis parser.EndDoctypeDeclHandler = lex.endDTD 259fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis 26045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def reset(self): 26145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake if self._namespaces: 262593d6b311e03b745e7b736f3d72269a684359924Andrew M. Kuchling self._parser = expat.ParserCreate(self._source.getEncoding(), " ", 2633f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis intern=self._interning) 2643f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis self._parser.namespace_prefixes = 1 26545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._parser.StartElementHandler = self.start_element_ns 26645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._parser.EndElementHandler = self.end_element_ns 26745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake else: 268593d6b311e03b745e7b736f3d72269a684359924Andrew M. Kuchling self._parser = expat.ParserCreate(self._source.getEncoding(), 269593d6b311e03b745e7b736f3d72269a684359924Andrew M. Kuchling intern = self._interning) 2706c4753f925467e5908a43d2fec6d15b76e878d42Paul Prescod self._parser.StartElementHandler = self.start_element 2716c4753f925467e5908a43d2fec6d15b76e878d42Paul Prescod self._parser.EndElementHandler = self.end_element 27245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 273fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis self._reset_cont_handler() 27445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl 27545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._parser.NotationDeclHandler = self.notation_decl 27645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._parser.StartNamespaceDeclHandler = self.start_namespace_decl 27745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._parser.EndNamespaceDeclHandler = self.end_namespace_decl 27870d39a60a80e2fcf21b05e899d43f6dab49f839eMartin v. Löwis 2790591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis self._decl_handler_prop = None 2800591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis if self._lex_handler_prop: 281fb73bb129b2ccbd9644709ac8eeac1d5e7f0a32dMartin v. Löwis self._reset_lex_handler_prop() 28270d39a60a80e2fcf21b05e899d43f6dab49f839eMartin v. Löwis# self._parser.DefaultHandler = 28370d39a60a80e2fcf21b05e899d43f6dab49f839eMartin v. Löwis# self._parser.DefaultHandlerExpand = 28470d39a60a80e2fcf21b05e899d43f6dab49f839eMartin v. Löwis# self._parser.NotStandaloneHandler = 28545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._parser.ExternalEntityRefHandler = self.external_entity_ref 2863f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis try: 2873f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis self._parser.SkippedEntityHandler = self.skipped_entity_handler 2883f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis except AttributeError: 2893f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis # This pyexpat does not support SkippedEntity 2903f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis pass 2913f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis self._parser.SetParamEntityParsing( 2923f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) 293bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel 29455b4efd034780a069c9bbf5b080a62df32f51441Lars Gustäbel self._parsing = 0 295bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel self._entity_stack = [] 29616f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 29745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake # Locator methods 29845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 29945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getColumnNumber(self): 3000591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis if self._parser is None: 3010591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis return None 30245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._parser.ErrorColumnNumber 30345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 30445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getLineNumber(self): 3050591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis if self._parser is None: 3060591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis return 1 30745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._parser.ErrorLineNumber 30845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 30945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getPublicId(self): 31045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._source.getPublicId() 31145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 31245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getSystemId(self): 313ee1dc157d7f425d8fdd12de098097441b4f17798Martin v. Löwis return self._source.getSystemId() 31416f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 31545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake # event handlers 31645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def start_element(self, name, attrs): 31732bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel self._cont_handler.startElement(name, AttributesImpl(attrs)) 31845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 31945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def end_element(self, name): 320f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel self._cont_handler.endElement(name) 32145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 32245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def start_element_ns(self, name, attrs): 323ab199622905b2621b2ad9abcb324fb5f124cc12fNeal Norwitz pair = name.split() 32445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake if len(pair) == 1: 3253f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis # no namespace 326f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel pair = (None, name) 3273f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis elif len(pair) == 3: 3283f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis pair = pair[0], pair[1] 329d2f5a9ac4b161018945cdb5e5a26a722ae86cdb9Lars Gustäbel else: 3303f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis # default namespace 331d2f5a9ac4b161018945cdb5e5a26a722ae86cdb9Lars Gustäbel pair = tuple(pair) 33245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 33332bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel newattrs = {} 3343f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis qnames = {} 33532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel for (aname, value) in attrs.items(): 3363f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis parts = aname.split() 3373f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis length = len(parts) 3383f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis if length == 1: 3393f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis # no namespace 3403f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis qname = aname 34132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel apair = (None, aname) 3423f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis elif length == 3: 3433f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis qname = "%s:%s" % (parts[2], parts[1]) 3443f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis apair = parts[0], parts[1] 34532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel else: 3463f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis # default namespace 3473f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis qname = parts[1] 3483f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis apair = tuple(parts) 34932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 35032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel newattrs[apair] = value 3513f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis qnames[apair] = qname 35232bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 35316f6329e6153c4b92f2175a5560e372a762befe6Fred Drake self._cont_handler.startElementNS(pair, None, 3543f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis AttributesNSImpl(newattrs, qnames)) 35545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 35645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def end_element_ns(self, name): 357ab199622905b2621b2ad9abcb324fb5f124cc12fNeal Norwitz pair = name.split() 35845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake if len(pair) == 1: 35932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel pair = (None, name) 3603f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis elif len(pair) == 3: 3613f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis pair = pair[0], pair[1] 3620591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis else: 3630591725bc5947c7b604b6d4bc59b0fc7e45d8070Martin v. Löwis pair = tuple(pair) 36416f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 365f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel self._cont_handler.endElementNS(pair, None) 36645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 367f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel # this is not used (call directly to ContentHandler) 36845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def processing_instruction(self, target, data): 36945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._cont_handler.processingInstruction(target, data) 37045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 371f43cf31f4a60091af8b2146f4589be53a6d76b8cLars Gustäbel # this is not used (call directly to ContentHandler) 37245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def character_data(self, data): 37345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._cont_handler.characters(data) 37445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 37545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def start_namespace_decl(self, prefix, uri): 37645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._cont_handler.startPrefixMapping(prefix, uri) 37745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 37845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def end_namespace_decl(self, prefix): 37945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._cont_handler.endPrefixMapping(prefix) 38016f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 381456ab1d2712dc9cebd878966c8fb16af47ea79f0Martin v. Löwis def start_doctype_decl(self, name, sysid, pubid, has_internal_subset): 3823f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis self._lex_handler_prop.startDTD(name, pubid, sysid) 3833f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis 38445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name): 38545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name) 38645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 38745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def notation_decl(self, name, base, sysid, pubid): 38845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._dtd_handler.notationDecl(name, pubid, sysid) 38945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 39045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def external_entity_ref(self, context, base, sysid, pubid): 39118476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis if not self._external_ges: 39218476a3740b66cea8ee1dffa820c432a389ba23aMartin v. Löwis return 1 393d1b516c274aa1502514d7b3c51f63894480560e1Martin v. Löwis 39445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake source = self._ent_handler.resolveEntity(pubid, sysid) 395e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel source = saxutils.prepare_input_source(source, 396e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel self._source.getSystemId() or 397e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel "") 39816f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 399e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel self._entity_stack.append((self._parser, self._source)) 400e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel self._parser = self._parser.ExternalEntityParserCreate(context) 401e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel self._source = source 402e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel 403e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel try: 404e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel xmlreader.IncrementalParser.parse(self, source) 405e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel except: 406e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel return 0 # FIXME: save error info here? 407e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel 408e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel (self._parser, self._source) = self._entity_stack[-1] 409e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel del self._entity_stack[-1] 41045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return 1 41116f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 4123f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis def skipped_entity_handler(self, name, is_pe): 4133f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis if is_pe: 4143f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis # The SAX spec requires to report skipped PEs with a '%' 4153f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis name = '%'+name 4163f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis self._cont_handler.skippedEntity(name) 4173f1b5288e51158d60734b434631e5ca9febef916Martin v. Löwis 41845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake# --- 41916f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 42045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakedef create_parser(*args, **kwargs): 42168468eba635570400f607e140425a222018e56f9Guido van Rossum return ExpatParser(*args, **kwargs) 42216f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 42345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake# --- 42445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 42545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeif __name__ == "__main__": 426e71bd81a890cea830681880bc5d7298dd98d71ceAmaury Forgeot d'Arc import xml.sax.saxutils 42745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake p = create_parser() 428e71bd81a890cea830681880bc5d7298dd98d71ceAmaury Forgeot d'Arc p.setContentHandler(xml.sax.saxutils.XMLGenerator()) 429fbdeaad06910a50d6f05da177949b9a451a1132aFred Drake p.setErrorHandler(xml.sax.ErrorHandler()) 430e71bd81a890cea830681880bc5d7298dd98d71ceAmaury Forgeot d'Arc p.parse("http://www.ibiblio.org/xml/examples/shakespeare/hamlet.xml") 431