xmlreader.py revision 491ded78ccd2b1fff10090e4d11ef42a9732bfdd
145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake"""An XML Reader is the SAX 2 name for an XML parser. XML Parsers
245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeshould be based on this code. """
307cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake
407cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drakeimport handler
507cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake
645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake# ===== XMLREADER =====
745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeclass XMLReader:
9bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel    """Interface for reading an XML document using callbacks.
10bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel
11bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel    XMLReader is the interface that an XML parser's SAX2 driver must
12bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel    implement. This interface allows an application to set and query
13bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel    features and properties in the parser, to register event handlers
14bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel    for document processing, and to initiate a document parse.
15bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel
16bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel    All SAX interfaces are assumed to be synchronous: the parse
17bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel    methods must not return until parsing is complete, and readers
18bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel    must wait for an event-handler callback to return before reporting
19bb757136b29369e88c72e1563ee95cd6514c15a0Lars Gustäbel    the next event."""
20523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
2145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def __init__(self):
22f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        self._cont_handler = handler.ContentHandler()
23e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel        self._dtd_handler = handler.DTDHandler()
24e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1bLars Gustäbel        self._ent_handler = handler.EntityResolver()
2507cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake        self._err_handler = handler.ErrorHandler()
2645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
2745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def parse(self, source):
28f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        "Parse an XML document from a system identifier or an InputSource."
2945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise NotImplementedError("This method must be implemented!")
3045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
3145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getContentHandler(self):
3245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        "Returns the current ContentHandler."
3345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._cont_handler
3445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
3545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def setContentHandler(self, handler):
3645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        "Registers a new object to receive document content events."
3745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._cont_handler = handler
3845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
3945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getDTDHandler(self):
4045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        "Returns the current DTD handler."
4145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._dtd_handler
4245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
4345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def setDTDHandler(self, handler):
44f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        "Register an object to receive basic DTD-related events."
45f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        self._dtd_handler = handler
4645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
4745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getEntityResolver(self):
4845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        "Returns the current EntityResolver."
4945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._ent_handler
5045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
5145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def setEntityResolver(self, resolver):
52f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        "Register an object to resolve external entities."
53f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        self._ent_handler = resolver
5445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
5545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getErrorHandler(self):
5645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        "Returns the current ErrorHandler."
5745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._err_handler
5845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
5945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def setErrorHandler(self, handler):
60f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        "Register an object to receive error-message events."
61f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        self._err_handler = handler
6245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
6345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def setLocale(self, locale):
6445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        """Allow an application to set the locale for errors and warnings.
6545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
667e47402264cf87b9bbb61fc9ff610af08add7c7bThomas Wouters        SAX parsers are not required to provide localization for errors
6745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        and warnings; if they cannot support the requested locale,
6845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        however, they must throw a SAX exception. Applications may
6945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        request a locale change in the middle of a parse."""
7045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise SAXNotSupportedException("Locale support not implemented")
7145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
7245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getFeature(self, name):
7345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        "Looks up and returns the state of a SAX2 feature."
7445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
7545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
7645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def setFeature(self, name, state):
7745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        "Sets the state of a SAX2 feature."
7845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
7945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
8045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getProperty(self, name):
8145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        "Looks up and returns the value of a SAX2 property."
8245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise SAXNotRecognizedException("Property '%s' not recognized" % name)
8345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
8445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def setProperty(self, name, value):
8545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        "Sets the value of a SAX2 property."
8645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise SAXNotRecognizedException("Property '%s' not recognized" % name)
8745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
8845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeclass IncrementalParser(XMLReader):
8945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    """This interface adds three extra methods to the XMLReader
9045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    interface that allow XML parsers to support incremental
9145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    parsing. Support for this interface is optional, since not all
9245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    underlying XML parsers support this functionality.
9345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
9445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    When the parser is instantiated it is ready to begin accepting
9545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    data from the feed method immediately. After parsing has been
9645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    finished with a call to close the reset method must be called to
9745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    make the parser ready to accept new data, either from feed or
9845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    using the parse method.
9945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
10045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    Note that these methods must _not_ be called during parsing, that
10145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    is, after parse has been called and before it returns.
10245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
10345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    By default, the class also implements the parse method of the XMLReader
10445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    interface using the feed, close and reset methods of the
10545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    IncrementalParser interface as a convenience to SAX 2.0 driver
10645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    writers."""
10707cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake
10807cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake    def __init__(self, bufsize=2**16):
10907cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake        self._bufsize = bufsize
11007cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake        XMLReader.__init__(self)
11107cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake
112523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def parse(self, source):
113491ded78ccd2b1fff10090e4d11ef42a9732bfddMartin v. Löwis        import saxutils
114523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        source = saxutils.prepare_input_source(source)
115523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
116523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.prepareParser(source)
117523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        file = source.getByteStream()
118523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        buffer = file.read(self._bufsize)
11945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        while buffer != "":
12045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake            self.feed(buffer)
121523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel            buffer = file.read(self._bufsize)
12245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
12345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def feed(self, data):
12445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        """This method gives the raw XML data in the data parameter to
12545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        the parser and makes it parse the data, emitting the
12645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        corresponding events. It is allowed for XML constructs to be
12745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        split across several calls to feed.
12845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
12945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        feed may raise SAXException."""
13045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise NotImplementedError("This method must be implemented!")
13107cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake
13245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def prepareParser(self, source):
13345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        """This method is called by the parse implementation to allow
13445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        the SAX 2.0 driver to prepare itself for parsing."""
13545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise NotImplementedError("prepareParser must be overridden!")
13645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
13745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def close(self):
13845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        """This method is called when the entire XML document has been
13945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        passed to the parser through the feed method, to notify the
14045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        parser that there are no more data. This allows the parser to
14145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        do the final checks on the document and empty the internal
14245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        data buffer.
14345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
14445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        The parser will not be ready to parse another document until
14545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        the reset method has been called.
14645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
14745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        close may raise SAXException."""
14845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise NotImplementedError("This method must be implemented!")
14945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
15045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def reset(self):
15145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        """This method is called after close has been called to reset
15245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        the parser so that it is ready to parse new documents. The
15345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        results of calling parse or feed after close without calling
15445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        reset are undefined."""
15545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        raise NotImplementedError("This method must be implemented!")
15645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
15745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake# ===== LOCATOR =====
15832bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
15945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeclass Locator:
16045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    """Interface for associating a SAX event with a document
16145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    location. A locator object will return valid results only during
16245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    calls to DocumentHandler methods; at any other time, the
16345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    results are unpredictable."""
16445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
16545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getColumnNumber(self):
166f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        "Return the column number where the current event ends."
167f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        return -1
16845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
16945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getLineNumber(self):
170f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        "Return the line number where the current event ends."
171f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        return -1
17245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
17345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getPublicId(self):
174f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        "Return the public identifier for the current event."
175f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        return None
17645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
17745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getSystemId(self):
178f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        "Return the system identifier for the current event."
179f9059ebede7acc7d498d0703dd97adede37c5016Skip Montanaro        return None
18045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
181523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel# ===== INPUTSOURCE =====
182523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
183523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbelclass InputSource:
184523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    """Encapsulation of the information needed by the XMLReader to
185523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    read entities.
186523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
187523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    This class may include information about the public identifier,
188523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    system identifier, byte stream (possibly with character encoding
189523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    information) and/or the character stream of an entity.
190523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
191523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    Applications will create objects of this class for use in the
192523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    XMLReader.parse method and for returning from
193523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    EntityResolver.resolveEntity.
194523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
195523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    An InputSource belongs to the application, the XMLReader is not
196523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    allowed to modify InputSource objects passed to it from the
197523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    application, although it may make copies and modify those."""
198523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
199523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def __init__(self, system_id = None):
200523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.__system_id = system_id
201523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.__public_id = None
202523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.__encoding  = None
203523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.__bytefile  = None
204523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.__charfile  = None
205523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
206523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def setPublicId(self, public_id):
207523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        "Sets the public identifier of this InputSource."
208523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.__public_id = public_id
209523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
210523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def getPublicId(self):
211523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        "Returns the public identifier of this InputSource."
212523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        return self.__public_id
213523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
214523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def setSystemId(self, system_id):
215523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        "Sets the system identifier of this InputSource."
216523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.__system_id = system_id
217523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
218523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def getSystemId(self):
219523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        "Returns the system identifier of this InputSource."
220523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        return self.__system_id
221523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
222523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def setEncoding(self, encoding):
223523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        """Sets the character encoding of this InputSource.
224523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
225523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        The encoding must be a string acceptable for an XML encoding
226523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        declaration (see section 4.3.3 of the XML recommendation).
227523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
228523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        The encoding attribute of the InputSource is ignored if the
229523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        InputSource also contains a character stream."""
230523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.__encoding = encoding
231523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
232523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def getEncoding(self):
233523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        "Get the character encoding of this InputSource."
234523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        return self.__encoding
235523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
236523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def setByteStream(self, bytefile):
237523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        """Set the byte stream (a Python file-like object which does
238523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        not perform byte-to-character conversion) for this input
239523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        source.
240523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
241523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        The SAX parser will ignore this if there is also a character
242523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        stream specified, but it will use a byte stream in preference
243523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        to opening a URI connection itself.
244523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
245523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        If the application knows the character encoding of the byte
246523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        stream, it should set it with the setEncoding method."""
247523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.__bytefile = bytefile
248523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
249523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def getByteStream(self):
250523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        """Get the byte stream for this input source.
251523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
252523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        The getEncoding method will return the character encoding for
253523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        this byte stream, or None if unknown."""
254523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        return self.__bytefile
255523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
256523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def setCharacterStream(self, charfile):
257523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        """Set the character stream for this input source. (The stream
258523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        must be a Python 1.6 Unicode-wrapped file-like that performs
259523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        conversion to Unicode strings.)
260523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
261523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        If there is a character stream specified, the SAX parser will
262523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        ignore any byte stream and will not attempt to open a URI
263523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        connection to the system identifier."""
264523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        self.__charfile = charfile
265523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
266523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel    def getCharacterStream(self):
267523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        "Get the character stream for this input source."
268523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel        return self.__charfile
269523b0a6ec87ac7f84de8a004e3c33581eb2a542fLars Gustäbel
27032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel# ===== ATTRIBUTESIMPL =====
27132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
27245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakeclass AttributesImpl:
27332bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
27432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel    def __init__(self, attrs):
27532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        """Non-NS-aware implementation.
27632bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
27732bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        attrs should be of the form {name : value}."""
27845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        self._attrs = attrs
27945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
28045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getLength(self):
28145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return len(self._attrs)
28245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
28345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getType(self, name):
28445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return "CDATA"
28545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
28645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getValue(self, name):
28745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._attrs[name]
28845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
28945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getValueByQName(self, name):
29032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        return self._attrs[name]
29145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
29245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getNameByQName(self, name):
29332bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        if not self._attrs.has_key(name):
29432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel            raise KeyError
29532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        return name
29632bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
29732bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel    def getQNameByName(self, name):
29832bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        if not self._attrs.has_key(name):
29932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel            raise KeyError
30032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        return name
30132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
30245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getNames(self):
30345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._attrs.keys()
30445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
30545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def getQNames(self):
30632bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        return self._attrs.keys()
30745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
30845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def __len__(self):
30945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return len(self._attrs)
31045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
31145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def __getitem__(self, name):
31245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._attrs[name]
31345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
31445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def keys(self):
31545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._attrs.keys()
31645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
31745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def has_key(self, name):
31845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._attrs.has_key(name)
31945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
32045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def get(self, name, alternative=None):
32145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._attrs.get(name, alternative)
32245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
32345cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def copy(self):
32432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        return self.__class__(self._attrs)
32545cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
32645cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def items(self):
32745cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._attrs.items()
32845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
32945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    def values(self):
33045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake        return self._attrs.values()
33145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
33232bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel# ===== ATTRIBUTESNSIMPL =====
33332bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
33432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbelclass AttributesNSImpl(AttributesImpl):
33532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
33632bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel    def __init__(self, attrs, qnames):
33732bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        """NS-aware implementation.
33832bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
33932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        attrs should be of the form {(ns_uri, lname): value, ...}.
34032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        qnames of the form {(ns_uri, lname): qname, ...}."""
34132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        self._attrs = attrs
34232bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        self._qnames = qnames
34332bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
34432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel    def getValueByQName(self, name):
34532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        for (nsname, qname) in self._qnames.items():
34632bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel            if qname == name:
34732bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel                return self._attrs[nsname]
34832bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
34932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        raise KeyError
35032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
35132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel    def getNameByQName(self, name):
35232bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        for (nsname, qname) in self._qnames.items():
35332bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel            if qname == name:
35432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel                return nsname
35532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
35632bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        raise KeyError
35732bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
35832bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel    def getQNameByName(self, name):
35932bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        return self._qnames[name]
36032bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
36132bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel    def getQNames(self):
36232bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        return self._qnames.values()
36332bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
36432bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel    def copy(self):
36532bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel        return self.__class__(self._attrs, self._qnames)
36632bf12eb8a5849762721b561f9b48c6e897792e9Lars Gustäbel
36707cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drake
36845cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drakedef _test():
36945cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    XMLReader()
37045cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    IncrementalParser()
37145cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    Locator()
37245cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake
37307cbc4e5bd7fdc2f5e92ac65dbabf35bde002befFred Drakeif __name__ == "__main__":
37445cd9de2bb2faa96bb18eb11d20261d7d1b8c20eFred Drake    _test()
375