saxutils.py revision 95b4ec5fbe039f1b6877d74fffc6564a42efe1cb
1"""\
2A library of useful helper classes to the SAX classes, for the
3convenience of application and driver writers.
4"""
5
6import os, urlparse, urllib, types
7import handler
8import xmlreader
9
10try:
11    _StringTypes = [types.StringType, types.UnicodeType]
12except AttributeError:
13    _StringTypes = [types.StringType]
14
15
16def escape(data, entities={}):
17    """Escape &, <, and > in a string of data.
18
19    You can escape other strings of data by passing a dictionary as
20    the optional entities parameter.  The keys and values must all be
21    strings; each key will be replaced with its corresponding value.
22    """
23    data = data.replace("&", "&amp;")
24    data = data.replace("<", "&lt;")
25    data = data.replace(">", "&gt;")
26    for chars, entity in entities.items():
27        data = data.replace(chars, entity)
28    return data
29
30
31class XMLGenerator(handler.ContentHandler):
32
33    def __init__(self, out=None, encoding="iso-8859-1"):
34        if out is None:
35            import sys
36            out = sys.stdout
37        handler.ContentHandler.__init__(self)
38        self._out = out
39        self._ns_contexts = [{}] # contains uri -> prefix dicts
40        self._current_context = self._ns_contexts[-1]
41        self._undeclared_ns_maps = []
42        self._encoding = encoding
43
44    # ContentHandler methods
45
46    def startDocument(self):
47        self._out.write('<?xml version="1.0" encoding="%s"?>\n' %
48                        self._encoding)
49
50    def startPrefixMapping(self, prefix, uri):
51        self._ns_contexts.append(self._current_context.copy())
52        self._current_context[uri] = prefix
53        self._undeclared_ns_maps.append((prefix, uri))
54
55    def endPrefixMapping(self, prefix):
56        self._current_context = self._ns_contexts[-1]
57        del self._ns_contexts[-1]
58
59    def startElement(self, name, attrs):
60        self._out.write('<' + name)
61        for (name, value) in attrs.items():
62            self._out.write(' %s="%s"' % (name, escape(value)))
63        self._out.write('>')
64
65    def endElement(self, name):
66        self._out.write('</%s>' % name)
67
68    def startElementNS(self, name, qname, attrs):
69        if name[0] is None:
70            # if the name was not namespace-scoped, use the unqualified part
71            name = name[1]
72        else:
73            # else try to restore the original prefix from the namespace
74            name = self._current_context[name[0]] + ":" + name[1]
75        self._out.write('<' + name)
76
77        for pair in self._undeclared_ns_maps:
78            self._out.write(' xmlns:%s="%s"' % pair)
79        self._undeclared_ns_maps = []
80
81        for (name, value) in attrs.items():
82            name = self._current_context[name[0]] + ":" + name[1]
83            self._out.write(' %s="%s"' % (name, escape(value)))
84        self._out.write('>')
85
86    def endElementNS(self, name, qname):
87        if name[0] is None:
88            name = name[1]
89        else:
90            name = self._current_context[name[0]] + ":" + name[1]
91        self._out.write('</%s>' % name)
92
93    def characters(self, content):
94        self._out.write(escape(content))
95
96    def ignorableWhitespace(self, content):
97        self._out.write(content)
98
99    def processingInstruction(self, target, data):
100        self._out.write('<?%s %s?>' % (target, data))
101
102
103class XMLFilterBase(xmlreader.XMLReader):
104    """This class is designed to sit between an XMLReader and the
105    client application's event handlers.  By default, it does nothing
106    but pass requests up to the reader and events on to the handlers
107    unmodified, but subclasses can override specific methods to modify
108    the event stream or the configuration requests as they pass
109    through."""
110
111    def __init__(self, parent = None):
112        xmlreader.XMLReader.__init__(self)
113        self._parent = parent
114
115    # ErrorHandler methods
116
117    def error(self, exception):
118        self._err_handler.error(exception)
119
120    def fatalError(self, exception):
121        self._err_handler.fatalError(exception)
122
123    def warning(self, exception):
124        self._err_handler.warning(exception)
125
126    # ContentHandler methods
127
128    def setDocumentLocator(self, locator):
129        self._cont_handler.setDocumentLocator(locator)
130
131    def startDocument(self):
132        self._cont_handler.startDocument()
133
134    def endDocument(self):
135        self._cont_handler.endDocument()
136
137    def startPrefixMapping(self, prefix, uri):
138        self._cont_handler.startPrefixMapping(prefix, uri)
139
140    def endPrefixMapping(self, prefix):
141        self._cont_handler.endPrefixMapping(prefix)
142
143    def startElement(self, name, attrs):
144        self._cont_handler.startElement(name, attrs)
145
146    def endElement(self, name):
147        self._cont_handler.endElement(name)
148
149    def startElementNS(self, name, qname, attrs):
150        self._cont_handler.startElement(name, attrs)
151
152    def endElementNS(self, name, qname):
153        self._cont_handler.endElementNS(name, qname)
154
155    def characters(self, content):
156        self._cont_handler.characters(content)
157
158    def ignorableWhitespace(self, chars):
159        self._cont_handler.ignorableWhitespace(chars)
160
161    def processingInstruction(self, target, data):
162        self._cont_handler.processingInstruction(target, data)
163
164    def skippedEntity(self, name):
165        self._cont_handler.skippedEntity(name)
166
167    # DTDHandler methods
168
169    def notationDecl(self, name, publicId, systemId):
170        self._dtd_handler.notationDecl(name, publicId, systemId)
171
172    def unparsedEntityDecl(self, name, publicId, systemId, ndata):
173        self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
174
175    # EntityResolver methods
176
177    def resolveEntity(self, publicId, systemId):
178        self._ent_handler.resolveEntity(publicId, systemId)
179
180    # XMLReader methods
181
182    def parse(self, source):
183        self._parent.setContentHandler(self)
184        self._parent.setErrorHandler(self)
185        self._parent.setEntityResolver(self)
186        self._parent.setDTDHandler(self)
187        self._parent.parse(source)
188
189    def setLocale(self, locale):
190        self._parent.setLocale(locale)
191
192    def getFeature(self, name):
193        return self._parent.getFeature(name)
194
195    def setFeature(self, name, state):
196        self._parent.setFeature(name, state)
197
198    def getProperty(self, name):
199        return self._parent.getProperty(name)
200
201    def setProperty(self, name, value):
202        self._parent.setProperty(name, value)
203
204    # XMLFilter methods
205
206    def getParent(self):
207        return self._parent
208
209    def setParent(self, parent):
210        self._parent = parent
211
212# --- Utility functions
213
214def prepare_input_source(source, base = ""):
215    """This function takes an InputSource and an optional base URL and
216    returns a fully resolved InputSource object ready for reading."""
217
218    if type(source) in _StringTypes:
219        source = xmlreader.InputSource(source)
220    elif hasattr(source, "read"):
221        f = source
222        source = xmlreader.InputSource()
223        source.setByteStream(f)
224        if hasattr(f, "name"):
225            source.setSystemId(f.name)
226
227    if source.getByteStream() is None:
228        sysid = source.getSystemId()
229        if os.path.isfile(sysid):
230            basehead = os.path.split(os.path.normpath(base))[0]
231            source.setSystemId(os.path.join(basehead, sysid))
232            f = open(sysid, "rb")
233        else:
234            source.setSystemId(urlparse.urljoin(base, sysid))
235            f = urllib.urlopen(source.getSystemId())
236
237        source.setByteStream(f)
238
239    return source
240