145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake"""An XML Reader is the SAX 2 name for an XML parser. XML Parsers
245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeshould be based on this code. """
307cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake
407cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drakeimport handler
52c071955746c0ce296629d7b9194e2af14f5ce16Martin v. Löwis
62c071955746c0ce296629d7b9194e2af14f5ce16Martin v. Löwisfrom _exceptions import SAXNotSupportedException, SAXNotRecognizedException
72c071955746c0ce296629d7b9194e2af14f5ce16Martin v. Löwis
8904f2fcbd78936baa1fb94fa8f0f2119da8ed44cFred Drake
945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake# ===== XMLREADER =====
1045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
1145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeclass XMLReader:
1216f6329e6153c4b92f2175a5560e372a762befe6Fred Drake    """Interface for reading an XML document using callbacks.
13bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel
14bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel    XMLReader is the interface that an XML parser's SAX2 driver must
15bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel    implement. This interface allows an application to set and query
16bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel    features and properties in the parser, to register event handlers
17bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel    for document processing, and to initiate a document parse.
18bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel
19bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel    All SAX interfaces are assumed to be synchronous: the parse
20bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel    methods must not return until parsing is complete, and readers
21bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel    must wait for an event-handler callback to return before reporting
22bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel    the next event."""
2316f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
2445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def __init__(self):
25f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        self._cont_handler = handler.ContentHandler()
26e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel        self._dtd_handler = handler.DTDHandler()
27e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel        self._ent_handler = handler.EntityResolver()
2807cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake        self._err_handler = handler.ErrorHandler()
2945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
3045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def parse(self, source):
31f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        "Parse an XML document from a system identifier or an InputSource."
3245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise NotImplementedError("This method must be implemented!")
3345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
3445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getContentHandler(self):
3545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        "Returns the current ContentHandler."
3645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._cont_handler
3745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
3845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def setContentHandler(self, handler):
3945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        "Registers a new object to receive document content events."
4045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._cont_handler = handler
4116f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
4245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getDTDHandler(self):
4345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        "Returns the current DTD handler."
4445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._dtd_handler
4516f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
4645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def setDTDHandler(self, handler):
47f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        "Register an object to receive basic DTD-related events."
48f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        self._dtd_handler = handler
4945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
5045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getEntityResolver(self):
5145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        "Returns the current EntityResolver."
5245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._ent_handler
5316f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
5445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def setEntityResolver(self, resolver):
55f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        "Register an object to resolve external entities."
56f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        self._ent_handler = resolver
5745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
5845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getErrorHandler(self):
5945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        "Returns the current ErrorHandler."
6045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._err_handler
6116f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
6245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def setErrorHandler(self, handler):
63f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        "Register an object to receive error-message events."
64f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        self._err_handler = handler
6545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
6645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def setLocale(self, locale):
6716f6329e6153c4b92f2175a5560e372a762befe6Fred Drake        """Allow an application to set the locale for errors and warnings.
6816f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
697e47402264cf87b9bbb61fc9ff610af08add7c7bThomas Wouters        SAX parsers are not required to provide localization for errors
7045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        and warnings; if they cannot support the requested locale,
714bb142b1b712d8e67c81687d396685fba55abf77Andrew Svetlov        however, they must raise a SAX exception. Applications may
7245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        request a locale change in the middle of a parse."""
7345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise SAXNotSupportedException("Locale support not implemented")
7416f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
7545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getFeature(self, name):
7645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        "Looks up and returns the state of a SAX2 feature."
7745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
7845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
7945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def setFeature(self, name, state):
8045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        "Sets the state of a SAX2 feature."
8145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
8245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
8345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getProperty(self, name):
8445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        "Looks up and returns the value of a SAX2 property."
8545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise SAXNotRecognizedException("Property '%s' not recognized" % name)
8645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
8745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def setProperty(self, name, value):
8845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        "Sets the value of a SAX2 property."
8945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise SAXNotRecognizedException("Property '%s' not recognized" % name)
9045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
9145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeclass IncrementalParser(XMLReader):
9245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    """This interface adds three extra methods to the XMLReader
9345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    interface that allow XML parsers to support incremental
9445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    parsing. Support for this interface is optional, since not all
9545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    underlying XML parsers support this functionality.
9645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
9745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    When the parser is instantiated it is ready to begin accepting
9845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    data from the feed method immediately. After parsing has been
9945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    finished with a call to close the reset method must be called to
10045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    make the parser ready to accept new data, either from feed or
10145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    using the parse method.
10245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
10345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    Note that these methods must _not_ be called during parsing, that
10445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    is, after parse has been called and before it returns.
10545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
10645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    By default, the class also implements the parse method of the XMLReader
10745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    interface using the feed, close and reset methods of the
10845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    IncrementalParser interface as a convenience to SAX 2.0 driver
10945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    writers."""
11007cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake
11107cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake    def __init__(self, bufsize=2**16):
11207cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake        self._bufsize = bufsize
11307cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake        XMLReader.__init__(self)
11407cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake
115523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def parse(self, source):
116491ded78ccd2b1fff10090e4d11ef42a9732bfddMartin v. Löwis        import saxutils
117523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        source = saxutils.prepare_input_source(source)
11816f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
119523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.prepareParser(source)
120523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        file = source.getByteStream()
121523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        buffer = file.read(self._bufsize)
12245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        while buffer != "":
12345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake            self.feed(buffer)
124523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel            buffer = file.read(self._bufsize)
12531b485ffb0572fb1e71ee7ab6fb4a641a4710870Martin v. Löwis        self.close()
12645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
12716f6329e6153c4b92f2175a5560e372a762befe6Fred Drake    def feed(self, data):
12845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        """This method gives the raw XML data in the data parameter to
12945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        the parser and makes it parse the data, emitting the
13045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        corresponding events. It is allowed for XML constructs to be
13145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        split across several calls to feed.
13245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
13345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        feed may raise SAXException."""
13445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise NotImplementedError("This method must be implemented!")
13507cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake
13645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def prepareParser(self, source):
13745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        """This method is called by the parse implementation to allow
13845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        the SAX 2.0 driver to prepare itself for parsing."""
13945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise NotImplementedError("prepareParser must be overridden!")
14045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
14145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def close(self):
14245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        """This method is called when the entire XML document has been
14345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        passed to the parser through the feed method, to notify the
14445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        parser that there are no more data. This allows the parser to
14545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        do the final checks on the document and empty the internal
14645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        data buffer.
14745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
14845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        The parser will not be ready to parse another document until
14945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        the reset method has been called.
15045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
15145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        close may raise SAXException."""
15245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise NotImplementedError("This method must be implemented!")
15345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
15445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def reset(self):
15545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        """This method is called after close has been called to reset
15645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        the parser so that it is ready to parse new documents. The
15745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        results of calling parse or feed after close without calling
15845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        reset are undefined."""
15945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise NotImplementedError("This method must be implemented!")
16045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
16145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake# ===== LOCATOR =====
16232bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
16345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeclass Locator:
16445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    """Interface for associating a SAX event with a document
16545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    location. A locator object will return valid results only during
16645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    calls to DocumentHandler methods; at any other time, the
16745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    results are unpredictable."""
16845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
16945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getColumnNumber(self):
170f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        "Return the column number where the current event ends."
171f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        return -1
17245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
17345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getLineNumber(self):
174f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        "Return the line number where the current event ends."
175f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        return -1
17645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
17745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getPublicId(self):
178f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        "Return the public identifier for the current event."
179f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        return None
18045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
18145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getSystemId(self):
182f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        "Return the system identifier for the current event."
183f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        return None
18445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
185523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel# ===== INPUTSOURCE =====
186523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
187523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbelclass InputSource:
188523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    """Encapsulation of the information needed by the XMLReader to
189523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    read entities.
190523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
191523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    This class may include information about the public identifier,
192523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    system identifier, byte stream (possibly with character encoding
193523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    information) and/or the character stream of an entity.
194523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
195523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    Applications will create objects of this class for use in the
196523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    XMLReader.parse method and for returning from
197523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    EntityResolver.resolveEntity.
198523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
199523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    An InputSource belongs to the application, the XMLReader is not
200523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    allowed to modify InputSource objects passed to it from the
201523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    application, although it may make copies and modify those."""
202523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
203523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def __init__(self, system_id = None):
204523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.__system_id = system_id
205523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.__public_id = None
206523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.__encoding  = None
207523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.__bytefile  = None
208523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.__charfile  = None
209523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
210523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def setPublicId(self, public_id):
211523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        "Sets the public identifier of this InputSource."
212523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.__public_id = public_id
213523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
214523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def getPublicId(self):
215523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        "Returns the public identifier of this InputSource."
216523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        return self.__public_id
217523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
218523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def setSystemId(self, system_id):
219523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        "Sets the system identifier of this InputSource."
220523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.__system_id = system_id
221523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
222523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def getSystemId(self):
223523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        "Returns the system identifier of this InputSource."
224523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        return self.__system_id
225523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
226523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def setEncoding(self, encoding):
227523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        """Sets the character encoding of this InputSource.
228523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
229523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        The encoding must be a string acceptable for an XML encoding
230523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        declaration (see section 4.3.3 of the XML recommendation).
231523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
232523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        The encoding attribute of the InputSource is ignored if the
233523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        InputSource also contains a character stream."""
234523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.__encoding = encoding
235523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
236523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def getEncoding(self):
237523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        "Get the character encoding of this InputSource."
238523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        return self.__encoding
239523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
240523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def setByteStream(self, bytefile):
241523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        """Set the byte stream (a Python file-like object which does
242523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        not perform byte-to-character conversion) for this input
243523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        source.
24416f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
245523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        The SAX parser will ignore this if there is also a character
246523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        stream specified, but it will use a byte stream in preference
247523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        to opening a URI connection itself.
248523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
249523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        If the application knows the character encoding of the byte
250523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        stream, it should set it with the setEncoding method."""
251523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.__bytefile = bytefile
252523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
253523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def getByteStream(self):
254523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        """Get the byte stream for this input source.
25516f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
256523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        The getEncoding method will return the character encoding for
25716f6329e6153c4b92f2175a5560e372a762befe6Fred Drake        this byte stream, or None if unknown."""
258523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        return self.__bytefile
25916f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
260523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def setCharacterStream(self, charfile):
261523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        """Set the character stream for this input source. (The stream
262711a5bdc44823fde2935343cc33b15b19f49930dMartin v. Löwis        must be a Python 2.0 Unicode-wrapped file-like that performs
263523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        conversion to Unicode strings.)
26416f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
265523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        If there is a character stream specified, the SAX parser will
266523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        ignore any byte stream and will not attempt to open a URI
267523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        connection to the system identifier."""
268523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.__charfile = charfile
269523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
270523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def getCharacterStream(self):
271523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        "Get the character stream for this input source."
272523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        return self.__charfile
27316f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
27432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel# ===== ATTRIBUTESIMPL =====
27532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
27645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeclass AttributesImpl:
27716f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
27832bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel    def __init__(self, attrs):
27932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        """Non-NS-aware implementation.
28032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
28132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        attrs should be of the form {name : value}."""
28245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._attrs = attrs
28345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
28445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getLength(self):
28545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return len(self._attrs)
28645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
28745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getType(self, name):
28845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return "CDATA"
28945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
29045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getValue(self, name):
29145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._attrs[name]
29245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
29345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getValueByQName(self, name):
29432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        return self._attrs[name]
29545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
29645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getNameByQName(self, name):
2975b3d3729baada9e8913a2f8556bbe041834c1b17Brett Cannon        if not name in self._attrs:
298ec126dab9c98be553e17847e1599a1fbcdea9538Fred Drake            raise KeyError, name
29932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        return name
30032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
30132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel    def getQNameByName(self, name):
3025b3d3729baada9e8913a2f8556bbe041834c1b17Brett Cannon        if not name in self._attrs:
303ec126dab9c98be553e17847e1599a1fbcdea9538Fred Drake            raise KeyError, name
30416f6329e6153c4b92f2175a5560e372a762befe6Fred Drake        return name
30516f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
30645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getNames(self):
30745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._attrs.keys()
30845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
30945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getQNames(self):
31016f6329e6153c4b92f2175a5560e372a762befe6Fred Drake        return self._attrs.keys()
31145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
31245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def __len__(self):
31345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return len(self._attrs)
31445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
31545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def __getitem__(self, name):
31645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._attrs[name]
31745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
31845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def keys(self):
31945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._attrs.keys()
32045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
32145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def has_key(self, name):
3225b3d3729baada9e8913a2f8556bbe041834c1b17Brett Cannon        return name in self._attrs
32345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
3240e449234bf8e68dfe96af727e2ce7a53fb4f0dadRaymond Hettinger    def __contains__(self, name):
325429a74a11a0b31c6a7093a89f595f834daaa1b79Jack Diederich        return name in self._attrs
3260e449234bf8e68dfe96af727e2ce7a53fb4f0dadRaymond Hettinger
32745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def get(self, name, alternative=None):
32845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._attrs.get(name, alternative)
32945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
33045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def copy(self):
33132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        return self.__class__(self._attrs)
33245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
33345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def items(self):
33445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._attrs.items()
33545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
33645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def values(self):
33745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._attrs.values()
33845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
33932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel# ===== ATTRIBUTESNSIMPL =====
34032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
34132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbelclass AttributesNSImpl(AttributesImpl):
34216f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
34332bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel    def __init__(self, attrs, qnames):
34432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        """NS-aware implementation.
34532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
34632bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        attrs should be of the form {(ns_uri, lname): value, ...}.
34732bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        qnames of the form {(ns_uri, lname): qname, ...}."""
34832bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        self._attrs = attrs
34932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        self._qnames = qnames
35032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
35132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel    def getValueByQName(self, name):
35232bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        for (nsname, qname) in self._qnames.items():
35332bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel            if qname == name:
35432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel                return self._attrs[nsname]
35516f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
356ec126dab9c98be553e17847e1599a1fbcdea9538Fred Drake        raise KeyError, name
35732bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
35832bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel    def getNameByQName(self, name):
35932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        for (nsname, qname) in self._qnames.items():
36032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel            if qname == name:
36132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel                return nsname
36216f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
363ec126dab9c98be553e17847e1599a1fbcdea9538Fred Drake        raise KeyError, name
36432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
36532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel    def getQNameByName(self, name):
36632bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        return self._qnames[name]
36716f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
36832bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel    def getQNames(self):
36932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        return self._qnames.values()
37032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
37132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel    def copy(self):
37232bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        return self.__class__(self._attrs, self._qnames)
37316f6329e6153c4b92f2175a5560e372a762befe6Fred Drake
37407cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake
37545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakedef _test():
37645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    XMLReader()
37745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    IncrementalParser()
37845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    Locator()
37945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
38007cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drakeif __name__ == "__main__":
38145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    _test()
382