145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake"""An XML Reader is the SAX 2 name for an XML parser. XML Parsers 245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeshould be based on this code. """ 307cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake 407cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drakeimport handler 52c071955746c0ce296629d7b9194e2af14f5ce16Martin v. Löwis 62c071955746c0ce296629d7b9194e2af14f5ce16Martin v. Löwisfrom _exceptions import SAXNotSupportedException, SAXNotRecognizedException 72c071955746c0ce296629d7b9194e2af14f5ce16Martin v. Löwis 8904f2fcbd78936baa1fb94fa8f0f2119da8ed44cFred Drake 945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake# ===== XMLREADER ===== 1045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 1145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeclass XMLReader: 1216f6329e6153c4b92f2175a5560e372a762befe6Fred Drake """Interface for reading an XML document using callbacks. 13bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel 14bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel XMLReader is the interface that an XML parser's SAX2 driver must 15bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel implement. This interface allows an application to set and query 16bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel features and properties in the parser, to register event handlers 17bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel for document processing, and to initiate a document parse. 18bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel 19bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel All SAX interfaces are assumed to be synchronous: the parse 20bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel methods must not return until parsing is complete, and readers 21bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel must wait for an event-handler callback to return before reporting 22bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel the next event.""" 2316f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 2445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def __init__(self): 25f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro self._cont_handler = handler.ContentHandler() 26e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel self._dtd_handler = handler.DTDHandler() 27e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel self._ent_handler = handler.EntityResolver() 2807cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake self._err_handler = handler.ErrorHandler() 2945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 3045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def parse(self, source): 31f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro "Parse an XML document from a system identifier or an InputSource." 3245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise NotImplementedError("This method must be implemented!") 3345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 3445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getContentHandler(self): 3545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake "Returns the current ContentHandler." 3645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._cont_handler 3745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 3845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def setContentHandler(self, handler): 3945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake "Registers a new object to receive document content events." 4045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._cont_handler = handler 4116f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 4245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getDTDHandler(self): 4345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake "Returns the current DTD handler." 4445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._dtd_handler 4516f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 4645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def setDTDHandler(self, handler): 47f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro "Register an object to receive basic DTD-related events." 48f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro self._dtd_handler = handler 4945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 5045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getEntityResolver(self): 5145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake "Returns the current EntityResolver." 5245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._ent_handler 5316f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 5445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def setEntityResolver(self, resolver): 55f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro "Register an object to resolve external entities." 56f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro self._ent_handler = resolver 5745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 5845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getErrorHandler(self): 5945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake "Returns the current ErrorHandler." 6045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._err_handler 6116f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 6245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def setErrorHandler(self, handler): 63f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro "Register an object to receive error-message events." 64f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro self._err_handler = handler 6545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 6645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def setLocale(self, locale): 6716f6329e6153c4b92f2175a5560e372a762befe6Fred Drake """Allow an application to set the locale for errors and warnings. 6816f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 697e47402264cf87b9bbb61fc9ff610af08add7c7bThomas Wouters SAX parsers are not required to provide localization for errors 7045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake and warnings; if they cannot support the requested locale, 714bb142b1b712d8e67c81687d396685fba55abf77Andrew Svetlov however, they must raise a SAX exception. Applications may 7245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake request a locale change in the middle of a parse.""" 7345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise SAXNotSupportedException("Locale support not implemented") 7416f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 7545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getFeature(self, name): 7645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake "Looks up and returns the state of a SAX2 feature." 7745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise SAXNotRecognizedException("Feature '%s' not recognized" % name) 7845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 7945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def setFeature(self, name, state): 8045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake "Sets the state of a SAX2 feature." 8145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise SAXNotRecognizedException("Feature '%s' not recognized" % name) 8245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 8345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getProperty(self, name): 8445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake "Looks up and returns the value of a SAX2 property." 8545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise SAXNotRecognizedException("Property '%s' not recognized" % name) 8645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 8745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def setProperty(self, name, value): 8845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake "Sets the value of a SAX2 property." 8945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise SAXNotRecognizedException("Property '%s' not recognized" % name) 9045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 9145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeclass IncrementalParser(XMLReader): 9245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake """This interface adds three extra methods to the XMLReader 9345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake interface that allow XML parsers to support incremental 9445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake parsing. Support for this interface is optional, since not all 9545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake underlying XML parsers support this functionality. 9645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 9745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake When the parser is instantiated it is ready to begin accepting 9845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake data from the feed method immediately. After parsing has been 9945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake finished with a call to close the reset method must be called to 10045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake make the parser ready to accept new data, either from feed or 10145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake using the parse method. 10245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 10345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake Note that these methods must _not_ be called during parsing, that 10445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake is, after parse has been called and before it returns. 10545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 10645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake By default, the class also implements the parse method of the XMLReader 10745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake interface using the feed, close and reset methods of the 10845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake IncrementalParser interface as a convenience to SAX 2.0 driver 10945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake writers.""" 11007cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake 11107cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake def __init__(self, bufsize=2**16): 11207cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake self._bufsize = bufsize 11307cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake XMLReader.__init__(self) 11407cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake 115523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def parse(self, source): 116491ded78ccd2b1fff10090e4d11ef42a9732bfddMartin v. Löwis import saxutils 117523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel source = saxutils.prepare_input_source(source) 11816f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 119523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.prepareParser(source) 120523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel file = source.getByteStream() 121523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel buffer = file.read(self._bufsize) 12245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake while buffer != "": 12345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self.feed(buffer) 124523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel buffer = file.read(self._bufsize) 12531b485ffb0572fb1e71ee7ab6fb4a641a4710870Martin v. Löwis self.close() 12645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 12716f6329e6153c4b92f2175a5560e372a762befe6Fred Drake def feed(self, data): 12845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake """This method gives the raw XML data in the data parameter to 12945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake the parser and makes it parse the data, emitting the 13045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake corresponding events. It is allowed for XML constructs to be 13145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake split across several calls to feed. 13245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 13345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake feed may raise SAXException.""" 13445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise NotImplementedError("This method must be implemented!") 13507cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake 13645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def prepareParser(self, source): 13745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake """This method is called by the parse implementation to allow 13845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake the SAX 2.0 driver to prepare itself for parsing.""" 13945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise NotImplementedError("prepareParser must be overridden!") 14045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 14145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def close(self): 14245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake """This method is called when the entire XML document has been 14345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake passed to the parser through the feed method, to notify the 14445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake parser that there are no more data. This allows the parser to 14545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake do the final checks on the document and empty the internal 14645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake data buffer. 14745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 14845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake The parser will not be ready to parse another document until 14945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake the reset method has been called. 15045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 15145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake close may raise SAXException.""" 15245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise NotImplementedError("This method must be implemented!") 15345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 15445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def reset(self): 15545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake """This method is called after close has been called to reset 15645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake the parser so that it is ready to parse new documents. The 15745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake results of calling parse or feed after close without calling 15845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake reset are undefined.""" 15945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise NotImplementedError("This method must be implemented!") 16045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 16145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake# ===== LOCATOR ===== 16232bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 16345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeclass Locator: 16445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake """Interface for associating a SAX event with a document 16545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake location. A locator object will return valid results only during 16645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake calls to DocumentHandler methods; at any other time, the 16745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake results are unpredictable.""" 16845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 16945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getColumnNumber(self): 170f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro "Return the column number where the current event ends." 171f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro return -1 17245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 17345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getLineNumber(self): 174f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro "Return the line number where the current event ends." 175f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro return -1 17645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 17745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getPublicId(self): 178f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro "Return the public identifier for the current event." 179f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro return None 18045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 18145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getSystemId(self): 182f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro "Return the system identifier for the current event." 183f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro return None 18445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 185523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel# ===== INPUTSOURCE ===== 186523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 187523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbelclass InputSource: 188523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel """Encapsulation of the information needed by the XMLReader to 189523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel read entities. 190523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 191523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel This class may include information about the public identifier, 192523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel system identifier, byte stream (possibly with character encoding 193523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel information) and/or the character stream of an entity. 194523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 195523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel Applications will create objects of this class for use in the 196523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel XMLReader.parse method and for returning from 197523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel EntityResolver.resolveEntity. 198523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 199523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel An InputSource belongs to the application, the XMLReader is not 200523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel allowed to modify InputSource objects passed to it from the 201523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel application, although it may make copies and modify those.""" 202523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 203523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def __init__(self, system_id = None): 204523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.__system_id = system_id 205523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.__public_id = None 206523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.__encoding = None 207523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.__bytefile = None 208523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.__charfile = None 209523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 210523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def setPublicId(self, public_id): 211523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel "Sets the public identifier of this InputSource." 212523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.__public_id = public_id 213523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 214523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def getPublicId(self): 215523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel "Returns the public identifier of this InputSource." 216523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel return self.__public_id 217523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 218523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def setSystemId(self, system_id): 219523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel "Sets the system identifier of this InputSource." 220523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.__system_id = system_id 221523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 222523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def getSystemId(self): 223523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel "Returns the system identifier of this InputSource." 224523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel return self.__system_id 225523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 226523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def setEncoding(self, encoding): 227523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel """Sets the character encoding of this InputSource. 228523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 229523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel The encoding must be a string acceptable for an XML encoding 230523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel declaration (see section 4.3.3 of the XML recommendation). 231523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 232523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel The encoding attribute of the InputSource is ignored if the 233523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel InputSource also contains a character stream.""" 234523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.__encoding = encoding 235523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 236523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def getEncoding(self): 237523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel "Get the character encoding of this InputSource." 238523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel return self.__encoding 239523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 240523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def setByteStream(self, bytefile): 241523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel """Set the byte stream (a Python file-like object which does 242523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel not perform byte-to-character conversion) for this input 243523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel source. 24416f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 245523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel The SAX parser will ignore this if there is also a character 246523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel stream specified, but it will use a byte stream in preference 247523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel to opening a URI connection itself. 248523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 249523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel If the application knows the character encoding of the byte 250523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel stream, it should set it with the setEncoding method.""" 251523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.__bytefile = bytefile 252523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 253523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def getByteStream(self): 254523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel """Get the byte stream for this input source. 25516f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 256523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel The getEncoding method will return the character encoding for 25716f6329e6153c4b92f2175a5560e372a762befe6Fred Drake this byte stream, or None if unknown.""" 258523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel return self.__bytefile 25916f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 260523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def setCharacterStream(self, charfile): 261523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel """Set the character stream for this input source. (The stream 262711a5bdc44823fde2935343cc33b15b19f49930dMartin v. Löwis must be a Python 2.0 Unicode-wrapped file-like that performs 263523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel conversion to Unicode strings.) 26416f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 265523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel If there is a character stream specified, the SAX parser will 266523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel ignore any byte stream and will not attempt to open a URI 267523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel connection to the system identifier.""" 268523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.__charfile = charfile 269523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 270523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def getCharacterStream(self): 271523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel "Get the character stream for this input source." 272523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel return self.__charfile 27316f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 27432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel# ===== ATTRIBUTESIMPL ===== 27532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 27645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeclass AttributesImpl: 27716f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 27832bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel def __init__(self, attrs): 27932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel """Non-NS-aware implementation. 28032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 28132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel attrs should be of the form {name : value}.""" 28245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._attrs = attrs 28345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 28445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getLength(self): 28545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return len(self._attrs) 28645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 28745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getType(self, name): 28845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return "CDATA" 28945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 29045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getValue(self, name): 29145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._attrs[name] 29245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 29345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getValueByQName(self, name): 29432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel return self._attrs[name] 29545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 29645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getNameByQName(self, name): 2975b3d3729baada9e8913a2f8556bbe041834c1b17Brett Cannon if not name in self._attrs: 298ec126dab9c98be553e17847e1599a1fbcdea9538Fred Drake raise KeyError, name 29932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel return name 30032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 30132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel def getQNameByName(self, name): 3025b3d3729baada9e8913a2f8556bbe041834c1b17Brett Cannon if not name in self._attrs: 303ec126dab9c98be553e17847e1599a1fbcdea9538Fred Drake raise KeyError, name 30416f6329e6153c4b92f2175a5560e372a762befe6Fred Drake return name 30516f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 30645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getNames(self): 30745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._attrs.keys() 30845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 30945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getQNames(self): 31016f6329e6153c4b92f2175a5560e372a762befe6Fred Drake return self._attrs.keys() 31145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 31245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def __len__(self): 31345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return len(self._attrs) 31445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 31545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def __getitem__(self, name): 31645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._attrs[name] 31745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 31845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def keys(self): 31945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._attrs.keys() 32045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 32145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def has_key(self, name): 3225b3d3729baada9e8913a2f8556bbe041834c1b17Brett Cannon return name in self._attrs 32345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 3240e449234bf8e68dfe96af727e2ce7a53fb4f0dadRaymond Hettinger def __contains__(self, name): 325429a74a11a0b31c6a7093a89f595f834daaa1b79Jack Diederich return name in self._attrs 3260e449234bf8e68dfe96af727e2ce7a53fb4f0dadRaymond Hettinger 32745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def get(self, name, alternative=None): 32845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._attrs.get(name, alternative) 32945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 33045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def copy(self): 33132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel return self.__class__(self._attrs) 33245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 33345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def items(self): 33445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._attrs.items() 33545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 33645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def values(self): 33745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._attrs.values() 33845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 33932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel# ===== ATTRIBUTESNSIMPL ===== 34032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 34132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbelclass AttributesNSImpl(AttributesImpl): 34216f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 34332bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel def __init__(self, attrs, qnames): 34432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel """NS-aware implementation. 34532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 34632bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel attrs should be of the form {(ns_uri, lname): value, ...}. 34732bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel qnames of the form {(ns_uri, lname): qname, ...}.""" 34832bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel self._attrs = attrs 34932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel self._qnames = qnames 35032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 35132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel def getValueByQName(self, name): 35232bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel for (nsname, qname) in self._qnames.items(): 35332bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel if qname == name: 35432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel return self._attrs[nsname] 35516f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 356ec126dab9c98be553e17847e1599a1fbcdea9538Fred Drake raise KeyError, name 35732bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 35832bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel def getNameByQName(self, name): 35932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel for (nsname, qname) in self._qnames.items(): 36032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel if qname == name: 36132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel return nsname 36216f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 363ec126dab9c98be553e17847e1599a1fbcdea9538Fred Drake raise KeyError, name 36432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 36532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel def getQNameByName(self, name): 36632bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel return self._qnames[name] 36716f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 36832bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel def getQNames(self): 36932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel return self._qnames.values() 37032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 37132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel def copy(self): 37232bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel return self.__class__(self._attrs, self._qnames) 37316f6329e6153c4b92f2175a5560e372a762befe6Fred Drake 37407cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake 37545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakedef _test(): 37645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake XMLReader() 37745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake IncrementalParser() 37845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake Locator() 37945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 38007cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drakeif __name__ == "__main__": 38145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake _test() 382