xmlreader.py revision 491ded78ccd2b1fff10090e4d11ef42a9732bfdd
145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake"""An XML Reader is the SAX 2 name for an XML parser. XML Parsers 245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeshould be based on this code. """ 307cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake 407cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drakeimport handler 507cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake 645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake# ===== XMLREADER ===== 745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeclass XMLReader: 9bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel """Interface for reading an XML document using callbacks. 10bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel 11bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel XMLReader is the interface that an XML parser's SAX2 driver must 12bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel implement. This interface allows an application to set and query 13bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel features and properties in the parser, to register event handlers 14bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel for document processing, and to initiate a document parse. 15bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel 16bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel All SAX interfaces are assumed to be synchronous: the parse 17bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel methods must not return until parsing is complete, and readers 18bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel must wait for an event-handler callback to return before reporting 19bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel the next event.""" 20523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 2145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def __init__(self): 22f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro self._cont_handler = handler.ContentHandler() 23e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel self._dtd_handler = handler.DTDHandler() 24e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel self._ent_handler = handler.EntityResolver() 2507cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake self._err_handler = handler.ErrorHandler() 2645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 2745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def parse(self, source): 28f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro "Parse an XML document from a system identifier or an InputSource." 2945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise NotImplementedError("This method must be implemented!") 3045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 3145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getContentHandler(self): 3245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake "Returns the current ContentHandler." 3345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._cont_handler 3445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 3545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def setContentHandler(self, handler): 3645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake "Registers a new object to receive document content events." 3745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._cont_handler = handler 3845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 3945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getDTDHandler(self): 4045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake "Returns the current DTD handler." 4145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._dtd_handler 4245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 4345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def setDTDHandler(self, handler): 44f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro "Register an object to receive basic DTD-related events." 45f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro self._dtd_handler = handler 4645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 4745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getEntityResolver(self): 4845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake "Returns the current EntityResolver." 4945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._ent_handler 5045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 5145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def setEntityResolver(self, resolver): 52f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro "Register an object to resolve external entities." 53f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro self._ent_handler = resolver 5445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 5545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getErrorHandler(self): 5645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake "Returns the current ErrorHandler." 5745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._err_handler 5845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 5945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def setErrorHandler(self, handler): 60f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro "Register an object to receive error-message events." 61f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro self._err_handler = handler 6245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 6345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def setLocale(self, locale): 6445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake """Allow an application to set the locale for errors and warnings. 6545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 667e47402264cf87b9bbb61fc9ff610af08add7c7bThomas Wouters SAX parsers are not required to provide localization for errors 6745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake and warnings; if they cannot support the requested locale, 6845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake however, they must throw a SAX exception. Applications may 6945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake request a locale change in the middle of a parse.""" 7045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise SAXNotSupportedException("Locale support not implemented") 7145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 7245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getFeature(self, name): 7345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake "Looks up and returns the state of a SAX2 feature." 7445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise SAXNotRecognizedException("Feature '%s' not recognized" % name) 7545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 7645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def setFeature(self, name, state): 7745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake "Sets the state of a SAX2 feature." 7845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise SAXNotRecognizedException("Feature '%s' not recognized" % name) 7945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 8045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getProperty(self, name): 8145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake "Looks up and returns the value of a SAX2 property." 8245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise SAXNotRecognizedException("Property '%s' not recognized" % name) 8345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 8445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def setProperty(self, name, value): 8545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake "Sets the value of a SAX2 property." 8645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise SAXNotRecognizedException("Property '%s' not recognized" % name) 8745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 8845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeclass IncrementalParser(XMLReader): 8945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake """This interface adds three extra methods to the XMLReader 9045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake interface that allow XML parsers to support incremental 9145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake parsing. Support for this interface is optional, since not all 9245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake underlying XML parsers support this functionality. 9345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 9445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake When the parser is instantiated it is ready to begin accepting 9545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake data from the feed method immediately. After parsing has been 9645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake finished with a call to close the reset method must be called to 9745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake make the parser ready to accept new data, either from feed or 9845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake using the parse method. 9945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 10045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake Note that these methods must _not_ be called during parsing, that 10145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake is, after parse has been called and before it returns. 10245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 10345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake By default, the class also implements the parse method of the XMLReader 10445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake interface using the feed, close and reset methods of the 10545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake IncrementalParser interface as a convenience to SAX 2.0 driver 10645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake writers.""" 10707cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake 10807cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake def __init__(self, bufsize=2**16): 10907cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake self._bufsize = bufsize 11007cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake XMLReader.__init__(self) 11107cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake 112523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def parse(self, source): 113491ded78ccd2b1fff10090e4d11ef42a9732bfddMartin v. Löwis import saxutils 114523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel source = saxutils.prepare_input_source(source) 115523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 116523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.prepareParser(source) 117523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel file = source.getByteStream() 118523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel buffer = file.read(self._bufsize) 11945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake while buffer != "": 12045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self.feed(buffer) 121523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel buffer = file.read(self._bufsize) 12245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 12345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def feed(self, data): 12445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake """This method gives the raw XML data in the data parameter to 12545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake the parser and makes it parse the data, emitting the 12645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake corresponding events. It is allowed for XML constructs to be 12745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake split across several calls to feed. 12845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 12945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake feed may raise SAXException.""" 13045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise NotImplementedError("This method must be implemented!") 13107cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake 13245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def prepareParser(self, source): 13345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake """This method is called by the parse implementation to allow 13445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake the SAX 2.0 driver to prepare itself for parsing.""" 13545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise NotImplementedError("prepareParser must be overridden!") 13645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 13745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def close(self): 13845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake """This method is called when the entire XML document has been 13945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake passed to the parser through the feed method, to notify the 14045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake parser that there are no more data. This allows the parser to 14145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake do the final checks on the document and empty the internal 14245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake data buffer. 14345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 14445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake The parser will not be ready to parse another document until 14545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake the reset method has been called. 14645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 14745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake close may raise SAXException.""" 14845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise NotImplementedError("This method must be implemented!") 14945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 15045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def reset(self): 15145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake """This method is called after close has been called to reset 15245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake the parser so that it is ready to parse new documents. The 15345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake results of calling parse or feed after close without calling 15445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake reset are undefined.""" 15545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise NotImplementedError("This method must be implemented!") 15645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 15745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake# ===== LOCATOR ===== 15832bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 15945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeclass Locator: 16045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake """Interface for associating a SAX event with a document 16145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake location. A locator object will return valid results only during 16245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake calls to DocumentHandler methods; at any other time, the 16345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake results are unpredictable.""" 16445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 16545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getColumnNumber(self): 166f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro "Return the column number where the current event ends." 167f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro return -1 16845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 16945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getLineNumber(self): 170f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro "Return the line number where the current event ends." 171f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro return -1 17245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 17345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getPublicId(self): 174f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro "Return the public identifier for the current event." 175f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro return None 17645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 17745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getSystemId(self): 178f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro "Return the system identifier for the current event." 179f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro return None 18045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 181523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel# ===== INPUTSOURCE ===== 182523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 183523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbelclass InputSource: 184523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel """Encapsulation of the information needed by the XMLReader to 185523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel read entities. 186523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 187523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel This class may include information about the public identifier, 188523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel system identifier, byte stream (possibly with character encoding 189523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel information) and/or the character stream of an entity. 190523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 191523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel Applications will create objects of this class for use in the 192523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel XMLReader.parse method and for returning from 193523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel EntityResolver.resolveEntity. 194523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 195523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel An InputSource belongs to the application, the XMLReader is not 196523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel allowed to modify InputSource objects passed to it from the 197523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel application, although it may make copies and modify those.""" 198523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 199523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def __init__(self, system_id = None): 200523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.__system_id = system_id 201523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.__public_id = None 202523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.__encoding = None 203523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.__bytefile = None 204523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.__charfile = None 205523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 206523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def setPublicId(self, public_id): 207523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel "Sets the public identifier of this InputSource." 208523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.__public_id = public_id 209523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 210523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def getPublicId(self): 211523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel "Returns the public identifier of this InputSource." 212523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel return self.__public_id 213523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 214523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def setSystemId(self, system_id): 215523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel "Sets the system identifier of this InputSource." 216523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.__system_id = system_id 217523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 218523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def getSystemId(self): 219523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel "Returns the system identifier of this InputSource." 220523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel return self.__system_id 221523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 222523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def setEncoding(self, encoding): 223523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel """Sets the character encoding of this InputSource. 224523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 225523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel The encoding must be a string acceptable for an XML encoding 226523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel declaration (see section 4.3.3 of the XML recommendation). 227523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 228523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel The encoding attribute of the InputSource is ignored if the 229523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel InputSource also contains a character stream.""" 230523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.__encoding = encoding 231523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 232523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def getEncoding(self): 233523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel "Get the character encoding of this InputSource." 234523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel return self.__encoding 235523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 236523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def setByteStream(self, bytefile): 237523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel """Set the byte stream (a Python file-like object which does 238523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel not perform byte-to-character conversion) for this input 239523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel source. 240523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 241523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel The SAX parser will ignore this if there is also a character 242523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel stream specified, but it will use a byte stream in preference 243523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel to opening a URI connection itself. 244523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 245523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel If the application knows the character encoding of the byte 246523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel stream, it should set it with the setEncoding method.""" 247523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.__bytefile = bytefile 248523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 249523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def getByteStream(self): 250523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel """Get the byte stream for this input source. 251523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 252523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel The getEncoding method will return the character encoding for 253523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel this byte stream, or None if unknown.""" 254523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel return self.__bytefile 255523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 256523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def setCharacterStream(self, charfile): 257523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel """Set the character stream for this input source. (The stream 258523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel must be a Python 1.6 Unicode-wrapped file-like that performs 259523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel conversion to Unicode strings.) 260523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 261523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel If there is a character stream specified, the SAX parser will 262523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel ignore any byte stream and will not attempt to open a URI 263523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel connection to the system identifier.""" 264523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.__charfile = charfile 265523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 266523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def getCharacterStream(self): 267523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel "Get the character stream for this input source." 268523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel return self.__charfile 269523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 27032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel# ===== ATTRIBUTESIMPL ===== 27132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 27245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeclass AttributesImpl: 27332bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 27432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel def __init__(self, attrs): 27532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel """Non-NS-aware implementation. 27632bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 27732bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel attrs should be of the form {name : value}.""" 27845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._attrs = attrs 27945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 28045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getLength(self): 28145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return len(self._attrs) 28245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 28345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getType(self, name): 28445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return "CDATA" 28545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 28645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getValue(self, name): 28745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._attrs[name] 28845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 28945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getValueByQName(self, name): 29032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel return self._attrs[name] 29145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 29245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getNameByQName(self, name): 29332bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel if not self._attrs.has_key(name): 29432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel raise KeyError 29532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel return name 29632bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 29732bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel def getQNameByName(self, name): 29832bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel if not self._attrs.has_key(name): 29932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel raise KeyError 30032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel return name 30132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 30245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getNames(self): 30345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._attrs.keys() 30445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 30545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getQNames(self): 30632bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel return self._attrs.keys() 30745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 30845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def __len__(self): 30945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return len(self._attrs) 31045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 31145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def __getitem__(self, name): 31245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._attrs[name] 31345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 31445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def keys(self): 31545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._attrs.keys() 31645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 31745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def has_key(self, name): 31845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._attrs.has_key(name) 31945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 32045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def get(self, name, alternative=None): 32145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._attrs.get(name, alternative) 32245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 32345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def copy(self): 32432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel return self.__class__(self._attrs) 32545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 32645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def items(self): 32745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._attrs.items() 32845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 32945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def values(self): 33045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._attrs.values() 33145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 33232bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel# ===== ATTRIBUTESNSIMPL ===== 33332bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 33432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbelclass AttributesNSImpl(AttributesImpl): 33532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 33632bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel def __init__(self, attrs, qnames): 33732bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel """NS-aware implementation. 33832bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 33932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel attrs should be of the form {(ns_uri, lname): value, ...}. 34032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel qnames of the form {(ns_uri, lname): qname, ...}.""" 34132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel self._attrs = attrs 34232bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel self._qnames = qnames 34332bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 34432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel def getValueByQName(self, name): 34532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel for (nsname, qname) in self._qnames.items(): 34632bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel if qname == name: 34732bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel return self._attrs[nsname] 34832bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 34932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel raise KeyError 35032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 35132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel def getNameByQName(self, name): 35232bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel for (nsname, qname) in self._qnames.items(): 35332bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel if qname == name: 35432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel return nsname 35532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 35632bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel raise KeyError 35732bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 35832bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel def getQNameByName(self, name): 35932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel return self._qnames[name] 36032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 36132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel def getQNames(self): 36232bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel return self._qnames.values() 36332bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 36432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel def copy(self): 36532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel return self.__class__(self._attrs, self._qnames) 36632bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 36707cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake 36845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakedef _test(): 36945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake XMLReader() 37045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake IncrementalParser() 37145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake Locator() 37245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 37307cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drakeif __name__ == "__main__": 37445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake _test() 375