xmlreader.py revision 31b485ffb0572fb1e71ee7ab6fb4a641a4710870
145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake"""An XML Reader is the SAX 2 name for an XML parser. XML Parsers
245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeshould be based on this code. """
307cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake
407cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drakeimport handler
507cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake
645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake# ===== XMLREADER =====
745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeclass XMLReader:
9bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel    """Interface for reading an XML document using callbacks.
10bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel
11bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel    XMLReader is the interface that an XML parser's SAX2 driver must
12bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel    implement. This interface allows an application to set and query
13bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel    features and properties in the parser, to register event handlers
14bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel    for document processing, and to initiate a document parse.
15bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel
16bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel    All SAX interfaces are assumed to be synchronous: the parse
17bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel    methods must not return until parsing is complete, and readers
18bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel    must wait for an event-handler callback to return before reporting
19bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel    the next event."""
20523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
2145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def __init__(self):
22f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        self._cont_handler = handler.ContentHandler()
23e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel        self._dtd_handler = handler.DTDHandler()
24e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel        self._ent_handler = handler.EntityResolver()
2507cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake        self._err_handler = handler.ErrorHandler()
2645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
2745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def parse(self, source):
28f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        "Parse an XML document from a system identifier or an InputSource."
2945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise NotImplementedError("This method must be implemented!")
3045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
3145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getContentHandler(self):
3245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        "Returns the current ContentHandler."
3345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._cont_handler
3445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
3545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def setContentHandler(self, handler):
3645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        "Registers a new object to receive document content events."
3745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._cont_handler = handler
3845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
3945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getDTDHandler(self):
4045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        "Returns the current DTD handler."
4145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._dtd_handler
4245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
4345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def setDTDHandler(self, handler):
44f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        "Register an object to receive basic DTD-related events."
45f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        self._dtd_handler = handler
4645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
4745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getEntityResolver(self):
4845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        "Returns the current EntityResolver."
4945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._ent_handler
5045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
5145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def setEntityResolver(self, resolver):
52f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        "Register an object to resolve external entities."
53f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        self._ent_handler = resolver
5445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
5545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getErrorHandler(self):
5645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        "Returns the current ErrorHandler."
5745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._err_handler
5845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
5945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def setErrorHandler(self, handler):
60f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        "Register an object to receive error-message events."
61f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        self._err_handler = handler
6245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
6345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def setLocale(self, locale):
6445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        """Allow an application to set the locale for errors and warnings.
6545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
667e47402264cf87b9bbb61fc9ff610af08add7c7bThomas Wouters        SAX parsers are not required to provide localization for errors
6745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        and warnings; if they cannot support the requested locale,
6845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        however, they must throw a SAX exception. Applications may
6945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        request a locale change in the middle of a parse."""
7045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise SAXNotSupportedException("Locale support not implemented")
7145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
7245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getFeature(self, name):
7345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        "Looks up and returns the state of a SAX2 feature."
7445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
7545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
7645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def setFeature(self, name, state):
7745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        "Sets the state of a SAX2 feature."
7845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
7945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
8045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getProperty(self, name):
8145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        "Looks up and returns the value of a SAX2 property."
8245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise SAXNotRecognizedException("Property '%s' not recognized" % name)
8345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
8445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def setProperty(self, name, value):
8545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        "Sets the value of a SAX2 property."
8645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise SAXNotRecognizedException("Property '%s' not recognized" % name)
8745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
8845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeclass IncrementalParser(XMLReader):
8945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    """This interface adds three extra methods to the XMLReader
9045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    interface that allow XML parsers to support incremental
9145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    parsing. Support for this interface is optional, since not all
9245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    underlying XML parsers support this functionality.
9345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
9445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    When the parser is instantiated it is ready to begin accepting
9545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    data from the feed method immediately. After parsing has been
9645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    finished with a call to close the reset method must be called to
9745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    make the parser ready to accept new data, either from feed or
9845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    using the parse method.
9945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
10045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    Note that these methods must _not_ be called during parsing, that
10145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    is, after parse has been called and before it returns.
10245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
10345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    By default, the class also implements the parse method of the XMLReader
10445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    interface using the feed, close and reset methods of the
10545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    IncrementalParser interface as a convenience to SAX 2.0 driver
10645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    writers."""
10707cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake
10807cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake    def __init__(self, bufsize=2**16):
10907cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake        self._bufsize = bufsize
11007cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake        XMLReader.__init__(self)
11107cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake
112523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def parse(self, source):
113491ded78ccd2b1fff10090e4d11ef42a9732bfddMartin v. Löwis        import saxutils
114523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        source = saxutils.prepare_input_source(source)
115523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
116523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.prepareParser(source)
117523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        file = source.getByteStream()
118523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        buffer = file.read(self._bufsize)
11945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        while buffer != "":
12045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake            self.feed(buffer)
121523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel            buffer = file.read(self._bufsize)
12231b485ffb0572fb1e71ee7ab6fb4a641a4710870Martin v. Löwis        self.close()
12345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
12445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def feed(self, data):
12545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        """This method gives the raw XML data in the data parameter to
12645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        the parser and makes it parse the data, emitting the
12745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        corresponding events. It is allowed for XML constructs to be
12845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        split across several calls to feed.
12945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
13045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        feed may raise SAXException."""
13145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise NotImplementedError("This method must be implemented!")
13207cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake
13345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def prepareParser(self, source):
13445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        """This method is called by the parse implementation to allow
13545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        the SAX 2.0 driver to prepare itself for parsing."""
13645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise NotImplementedError("prepareParser must be overridden!")
13745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
13845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def close(self):
13945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        """This method is called when the entire XML document has been
14045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        passed to the parser through the feed method, to notify the
14145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        parser that there are no more data. This allows the parser to
14245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        do the final checks on the document and empty the internal
14345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        data buffer.
14445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
14545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        The parser will not be ready to parse another document until
14645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        the reset method has been called.
14745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
14845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        close may raise SAXException."""
14945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise NotImplementedError("This method must be implemented!")
15045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
15145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def reset(self):
15245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        """This method is called after close has been called to reset
15345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        the parser so that it is ready to parse new documents. The
15445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        results of calling parse or feed after close without calling
15545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        reset are undefined."""
15645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise NotImplementedError("This method must be implemented!")
15745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
15845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake# ===== LOCATOR =====
15932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
16045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeclass Locator:
16145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    """Interface for associating a SAX event with a document
16245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    location. A locator object will return valid results only during
16345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    calls to DocumentHandler methods; at any other time, the
16445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    results are unpredictable."""
16545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
16645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getColumnNumber(self):
167f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        "Return the column number where the current event ends."
168f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        return -1
16945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
17045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getLineNumber(self):
171f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        "Return the line number where the current event ends."
172f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        return -1
17345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
17445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getPublicId(self):
175f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        "Return the public identifier for the current event."
176f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        return None
17745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
17845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getSystemId(self):
179f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        "Return the system identifier for the current event."
180f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        return None
18145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
182523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel# ===== INPUTSOURCE =====
183523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
184523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbelclass InputSource:
185523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    """Encapsulation of the information needed by the XMLReader to
186523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    read entities.
187523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
188523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    This class may include information about the public identifier,
189523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    system identifier, byte stream (possibly with character encoding
190523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    information) and/or the character stream of an entity.
191523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
192523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    Applications will create objects of this class for use in the
193523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    XMLReader.parse method and for returning from
194523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    EntityResolver.resolveEntity.
195523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
196523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    An InputSource belongs to the application, the XMLReader is not
197523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    allowed to modify InputSource objects passed to it from the
198523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    application, although it may make copies and modify those."""
199523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
200523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def __init__(self, system_id = None):
201523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.__system_id = system_id
202523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.__public_id = None
203523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.__encoding  = None
204523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.__bytefile  = None
205523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.__charfile  = None
206523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
207523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def setPublicId(self, public_id):
208523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        "Sets the public identifier of this InputSource."
209523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.__public_id = public_id
210523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
211523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def getPublicId(self):
212523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        "Returns the public identifier of this InputSource."
213523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        return self.__public_id
214523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
215523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def setSystemId(self, system_id):
216523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        "Sets the system identifier of this InputSource."
217523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.__system_id = system_id
218523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
219523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def getSystemId(self):
220523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        "Returns the system identifier of this InputSource."
221523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        return self.__system_id
222523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
223523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def setEncoding(self, encoding):
224523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        """Sets the character encoding of this InputSource.
225523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
226523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        The encoding must be a string acceptable for an XML encoding
227523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        declaration (see section 4.3.3 of the XML recommendation).
228523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
229523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        The encoding attribute of the InputSource is ignored if the
230523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        InputSource also contains a character stream."""
231523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.__encoding = encoding
232523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
233523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def getEncoding(self):
234523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        "Get the character encoding of this InputSource."
235523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        return self.__encoding
236523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
237523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def setByteStream(self, bytefile):
238523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        """Set the byte stream (a Python file-like object which does
239523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        not perform byte-to-character conversion) for this input
240523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        source.
241523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
242523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        The SAX parser will ignore this if there is also a character
243523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        stream specified, but it will use a byte stream in preference
244523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        to opening a URI connection itself.
245523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
246523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        If the application knows the character encoding of the byte
247523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        stream, it should set it with the setEncoding method."""
248523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.__bytefile = bytefile
249523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
250523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def getByteStream(self):
251523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        """Get the byte stream for this input source.
252523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
253523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        The getEncoding method will return the character encoding for
254523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        this byte stream, or None if unknown."""
255523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        return self.__bytefile
256523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
257523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def setCharacterStream(self, charfile):
258523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        """Set the character stream for this input source. (The stream
259523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        must be a Python 1.6 Unicode-wrapped file-like that performs
260523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        conversion to Unicode strings.)
261523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
262523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        If there is a character stream specified, the SAX parser will
263523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        ignore any byte stream and will not attempt to open a URI
264523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        connection to the system identifier."""
265523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.__charfile = charfile
266523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
267523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def getCharacterStream(self):
268523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        "Get the character stream for this input source."
269523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        return self.__charfile
270523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
27132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel# ===== ATTRIBUTESIMPL =====
27232bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
27345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeclass AttributesImpl:
27432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
27532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel    def __init__(self, attrs):
27632bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        """Non-NS-aware implementation.
27732bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
27832bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        attrs should be of the form {name : value}."""
27945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._attrs = attrs
28045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
28145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getLength(self):
28245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return len(self._attrs)
28345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
28445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getType(self, name):
28545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return "CDATA"
28645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
28745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getValue(self, name):
28845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._attrs[name]
28945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
29045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getValueByQName(self, name):
29132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        return self._attrs[name]
29245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
29345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getNameByQName(self, name):
29432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        if not self._attrs.has_key(name):
29532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel            raise KeyError
29632bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        return name
29732bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
29832bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel    def getQNameByName(self, name):
29932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        if not self._attrs.has_key(name):
30032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel            raise KeyError
30132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        return name
30232bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
30345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getNames(self):
30445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._attrs.keys()
30545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
30645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getQNames(self):
30732bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        return self._attrs.keys()
30845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
30945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def __len__(self):
31045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return len(self._attrs)
31145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
31245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def __getitem__(self, name):
31345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._attrs[name]
31445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
31545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def keys(self):
31645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._attrs.keys()
31745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
31845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def has_key(self, name):
31945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._attrs.has_key(name)
32045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
32145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def get(self, name, alternative=None):
32245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._attrs.get(name, alternative)
32345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
32445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def copy(self):
32532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        return self.__class__(self._attrs)
32645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
32745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def items(self):
32845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._attrs.items()
32945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
33045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def values(self):
33145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._attrs.values()
33245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
33332bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel# ===== ATTRIBUTESNSIMPL =====
33432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
33532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbelclass AttributesNSImpl(AttributesImpl):
33632bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
33732bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel    def __init__(self, attrs, qnames):
33832bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        """NS-aware implementation.
33932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
34032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        attrs should be of the form {(ns_uri, lname): value, ...}.
34132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        qnames of the form {(ns_uri, lname): qname, ...}."""
34232bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        self._attrs = attrs
34332bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        self._qnames = qnames
34432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
34532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel    def getValueByQName(self, name):
34632bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        for (nsname, qname) in self._qnames.items():
34732bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel            if qname == name:
34832bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel                return self._attrs[nsname]
34932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
35032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        raise KeyError
35132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
35232bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel    def getNameByQName(self, name):
35332bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        for (nsname, qname) in self._qnames.items():
35432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel            if qname == name:
35532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel                return nsname
35632bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
35732bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        raise KeyError
35832bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
35932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel    def getQNameByName(self, name):
36032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        return self._qnames[name]
36132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
36232bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel    def getQNames(self):
36332bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        return self._qnames.values()
36432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
36532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel    def copy(self):
36632bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        return self.__class__(self._attrs, self._qnames)
36732bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
36807cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake
36945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakedef _test():
37045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    XMLReader()
37145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    IncrementalParser()
37245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    Locator()
37345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
37407cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drakeif __name__ == "__main__":
37545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    _test()
376