xmlreader.py revision 31b485ffb0572fb1e71ee7ab6fb4a641a4710870
145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake"""An XML Reader is the SAX 2 name for an XML parser. XML Parsers 245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeshould be based on this code. """ 307cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake 407cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drakeimport handler 507cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake 645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake# ===== XMLREADER ===== 745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeclass XMLReader: 9bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel """Interface for reading an XML document using callbacks. 10bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel 11bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel XMLReader is the interface that an XML parser's SAX2 driver must 12bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel implement. This interface allows an application to set and query 13bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel features and properties in the parser, to register event handlers 14bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel for document processing, and to initiate a document parse. 15bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel 16bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel All SAX interfaces are assumed to be synchronous: the parse 17bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel methods must not return until parsing is complete, and readers 18bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel must wait for an event-handler callback to return before reporting 19bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel the next event.""" 20523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 2145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def __init__(self): 22f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro self._cont_handler = handler.ContentHandler() 23e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel self._dtd_handler = handler.DTDHandler() 24e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel self._ent_handler = handler.EntityResolver() 2507cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake self._err_handler = handler.ErrorHandler() 2645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 2745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def parse(self, source): 28f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro "Parse an XML document from a system identifier or an InputSource." 2945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise NotImplementedError("This method must be implemented!") 3045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 3145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getContentHandler(self): 3245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake "Returns the current ContentHandler." 3345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._cont_handler 3445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 3545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def setContentHandler(self, handler): 3645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake "Registers a new object to receive document content events." 3745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._cont_handler = handler 3845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 3945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getDTDHandler(self): 4045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake "Returns the current DTD handler." 4145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._dtd_handler 4245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 4345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def setDTDHandler(self, handler): 44f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro "Register an object to receive basic DTD-related events." 45f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro self._dtd_handler = handler 4645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 4745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getEntityResolver(self): 4845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake "Returns the current EntityResolver." 4945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._ent_handler 5045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 5145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def setEntityResolver(self, resolver): 52f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro "Register an object to resolve external entities." 53f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro self._ent_handler = resolver 5445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 5545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getErrorHandler(self): 5645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake "Returns the current ErrorHandler." 5745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._err_handler 5845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 5945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def setErrorHandler(self, handler): 60f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro "Register an object to receive error-message events." 61f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro self._err_handler = handler 6245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 6345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def setLocale(self, locale): 6445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake """Allow an application to set the locale for errors and warnings. 6545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 667e47402264cf87b9bbb61fc9ff610af08add7c7bThomas Wouters SAX parsers are not required to provide localization for errors 6745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake and warnings; if they cannot support the requested locale, 6845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake however, they must throw a SAX exception. Applications may 6945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake request a locale change in the middle of a parse.""" 7045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise SAXNotSupportedException("Locale support not implemented") 7145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 7245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getFeature(self, name): 7345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake "Looks up and returns the state of a SAX2 feature." 7445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise SAXNotRecognizedException("Feature '%s' not recognized" % name) 7545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 7645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def setFeature(self, name, state): 7745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake "Sets the state of a SAX2 feature." 7845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise SAXNotRecognizedException("Feature '%s' not recognized" % name) 7945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 8045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getProperty(self, name): 8145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake "Looks up and returns the value of a SAX2 property." 8245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise SAXNotRecognizedException("Property '%s' not recognized" % name) 8345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 8445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def setProperty(self, name, value): 8545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake "Sets the value of a SAX2 property." 8645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise SAXNotRecognizedException("Property '%s' not recognized" % name) 8745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 8845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeclass IncrementalParser(XMLReader): 8945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake """This interface adds three extra methods to the XMLReader 9045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake interface that allow XML parsers to support incremental 9145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake parsing. Support for this interface is optional, since not all 9245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake underlying XML parsers support this functionality. 9345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 9445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake When the parser is instantiated it is ready to begin accepting 9545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake data from the feed method immediately. After parsing has been 9645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake finished with a call to close the reset method must be called to 9745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake make the parser ready to accept new data, either from feed or 9845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake using the parse method. 9945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 10045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake Note that these methods must _not_ be called during parsing, that 10145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake is, after parse has been called and before it returns. 10245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 10345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake By default, the class also implements the parse method of the XMLReader 10445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake interface using the feed, close and reset methods of the 10545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake IncrementalParser interface as a convenience to SAX 2.0 driver 10645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake writers.""" 10707cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake 10807cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake def __init__(self, bufsize=2**16): 10907cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake self._bufsize = bufsize 11007cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake XMLReader.__init__(self) 11107cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake 112523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def parse(self, source): 113491ded78ccd2b1fff10090e4d11ef42a9732bfddMartin v. Löwis import saxutils 114523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel source = saxutils.prepare_input_source(source) 115523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 116523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.prepareParser(source) 117523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel file = source.getByteStream() 118523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel buffer = file.read(self._bufsize) 11945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake while buffer != "": 12045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self.feed(buffer) 121523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel buffer = file.read(self._bufsize) 12231b485ffb0572fb1e71ee7ab6fb4a641a4710870Martin v. Löwis self.close() 12345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 12445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def feed(self, data): 12545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake """This method gives the raw XML data in the data parameter to 12645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake the parser and makes it parse the data, emitting the 12745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake corresponding events. It is allowed for XML constructs to be 12845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake split across several calls to feed. 12945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 13045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake feed may raise SAXException.""" 13145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise NotImplementedError("This method must be implemented!") 13207cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake 13345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def prepareParser(self, source): 13445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake """This method is called by the parse implementation to allow 13545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake the SAX 2.0 driver to prepare itself for parsing.""" 13645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise NotImplementedError("prepareParser must be overridden!") 13745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 13845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def close(self): 13945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake """This method is called when the entire XML document has been 14045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake passed to the parser through the feed method, to notify the 14145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake parser that there are no more data. This allows the parser to 14245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake do the final checks on the document and empty the internal 14345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake data buffer. 14445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 14545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake The parser will not be ready to parse another document until 14645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake the reset method has been called. 14745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 14845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake close may raise SAXException.""" 14945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise NotImplementedError("This method must be implemented!") 15045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 15145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def reset(self): 15245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake """This method is called after close has been called to reset 15345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake the parser so that it is ready to parse new documents. The 15445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake results of calling parse or feed after close without calling 15545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake reset are undefined.""" 15645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake raise NotImplementedError("This method must be implemented!") 15745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 15845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake# ===== LOCATOR ===== 15932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 16045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeclass Locator: 16145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake """Interface for associating a SAX event with a document 16245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake location. A locator object will return valid results only during 16345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake calls to DocumentHandler methods; at any other time, the 16445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake results are unpredictable.""" 16545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 16645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getColumnNumber(self): 167f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro "Return the column number where the current event ends." 168f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro return -1 16945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 17045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getLineNumber(self): 171f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro "Return the line number where the current event ends." 172f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro return -1 17345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 17445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getPublicId(self): 175f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro "Return the public identifier for the current event." 176f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro return None 17745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 17845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getSystemId(self): 179f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro "Return the system identifier for the current event." 180f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro return None 18145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 182523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel# ===== INPUTSOURCE ===== 183523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 184523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbelclass InputSource: 185523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel """Encapsulation of the information needed by the XMLReader to 186523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel read entities. 187523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 188523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel This class may include information about the public identifier, 189523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel system identifier, byte stream (possibly with character encoding 190523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel information) and/or the character stream of an entity. 191523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 192523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel Applications will create objects of this class for use in the 193523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel XMLReader.parse method and for returning from 194523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel EntityResolver.resolveEntity. 195523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 196523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel An InputSource belongs to the application, the XMLReader is not 197523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel allowed to modify InputSource objects passed to it from the 198523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel application, although it may make copies and modify those.""" 199523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 200523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def __init__(self, system_id = None): 201523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.__system_id = system_id 202523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.__public_id = None 203523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.__encoding = None 204523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.__bytefile = None 205523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.__charfile = None 206523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 207523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def setPublicId(self, public_id): 208523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel "Sets the public identifier of this InputSource." 209523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.__public_id = public_id 210523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 211523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def getPublicId(self): 212523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel "Returns the public identifier of this InputSource." 213523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel return self.__public_id 214523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 215523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def setSystemId(self, system_id): 216523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel "Sets the system identifier of this InputSource." 217523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.__system_id = system_id 218523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 219523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def getSystemId(self): 220523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel "Returns the system identifier of this InputSource." 221523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel return self.__system_id 222523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 223523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def setEncoding(self, encoding): 224523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel """Sets the character encoding of this InputSource. 225523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 226523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel The encoding must be a string acceptable for an XML encoding 227523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel declaration (see section 4.3.3 of the XML recommendation). 228523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 229523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel The encoding attribute of the InputSource is ignored if the 230523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel InputSource also contains a character stream.""" 231523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.__encoding = encoding 232523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 233523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def getEncoding(self): 234523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel "Get the character encoding of this InputSource." 235523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel return self.__encoding 236523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 237523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def setByteStream(self, bytefile): 238523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel """Set the byte stream (a Python file-like object which does 239523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel not perform byte-to-character conversion) for this input 240523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel source. 241523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 242523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel The SAX parser will ignore this if there is also a character 243523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel stream specified, but it will use a byte stream in preference 244523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel to opening a URI connection itself. 245523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 246523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel If the application knows the character encoding of the byte 247523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel stream, it should set it with the setEncoding method.""" 248523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.__bytefile = bytefile 249523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 250523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def getByteStream(self): 251523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel """Get the byte stream for this input source. 252523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 253523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel The getEncoding method will return the character encoding for 254523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel this byte stream, or None if unknown.""" 255523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel return self.__bytefile 256523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 257523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def setCharacterStream(self, charfile): 258523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel """Set the character stream for this input source. (The stream 259523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel must be a Python 1.6 Unicode-wrapped file-like that performs 260523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel conversion to Unicode strings.) 261523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 262523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel If there is a character stream specified, the SAX parser will 263523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel ignore any byte stream and will not attempt to open a URI 264523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel connection to the system identifier.""" 265523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel self.__charfile = charfile 266523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 267523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel def getCharacterStream(self): 268523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel "Get the character stream for this input source." 269523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel return self.__charfile 270523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel 27132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel# ===== ATTRIBUTESIMPL ===== 27232bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 27345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeclass AttributesImpl: 27432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 27532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel def __init__(self, attrs): 27632bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel """Non-NS-aware implementation. 27732bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 27832bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel attrs should be of the form {name : value}.""" 27945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake self._attrs = attrs 28045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 28145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getLength(self): 28245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return len(self._attrs) 28345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 28445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getType(self, name): 28545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return "CDATA" 28645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 28745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getValue(self, name): 28845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._attrs[name] 28945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 29045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getValueByQName(self, name): 29132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel return self._attrs[name] 29245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 29345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getNameByQName(self, name): 29432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel if not self._attrs.has_key(name): 29532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel raise KeyError 29632bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel return name 29732bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 29832bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel def getQNameByName(self, name): 29932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel if not self._attrs.has_key(name): 30032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel raise KeyError 30132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel return name 30232bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 30345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getNames(self): 30445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._attrs.keys() 30545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 30645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def getQNames(self): 30732bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel return self._attrs.keys() 30845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 30945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def __len__(self): 31045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return len(self._attrs) 31145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 31245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def __getitem__(self, name): 31345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._attrs[name] 31445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 31545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def keys(self): 31645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._attrs.keys() 31745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 31845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def has_key(self, name): 31945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._attrs.has_key(name) 32045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 32145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def get(self, name, alternative=None): 32245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._attrs.get(name, alternative) 32345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 32445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def copy(self): 32532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel return self.__class__(self._attrs) 32645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 32745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def items(self): 32845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._attrs.items() 32945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 33045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake def values(self): 33145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake return self._attrs.values() 33245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 33332bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel# ===== ATTRIBUTESNSIMPL ===== 33432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 33532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbelclass AttributesNSImpl(AttributesImpl): 33632bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 33732bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel def __init__(self, attrs, qnames): 33832bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel """NS-aware implementation. 33932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 34032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel attrs should be of the form {(ns_uri, lname): value, ...}. 34132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel qnames of the form {(ns_uri, lname): qname, ...}.""" 34232bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel self._attrs = attrs 34332bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel self._qnames = qnames 34432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 34532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel def getValueByQName(self, name): 34632bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel for (nsname, qname) in self._qnames.items(): 34732bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel if qname == name: 34832bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel return self._attrs[nsname] 34932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 35032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel raise KeyError 35132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 35232bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel def getNameByQName(self, name): 35332bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel for (nsname, qname) in self._qnames.items(): 35432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel if qname == name: 35532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel return nsname 35632bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 35732bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel raise KeyError 35832bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 35932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel def getQNameByName(self, name): 36032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel return self._qnames[name] 36132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 36232bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel def getQNames(self): 36332bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel return self._qnames.values() 36432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 36532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel def copy(self): 36632bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel return self.__class__(self._attrs, self._qnames) 36732bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel 36807cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake 36945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakedef _test(): 37045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake XMLReader() 37145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake IncrementalParser() 37245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake Locator() 37345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake 37407cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drakeif __name__ == "__main__": 37545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake _test() 376