saxutils.py revision 95b4ec5fbe039f1b6877d74fffc6564a42efe1cb
1"""\ 2A library of useful helper classes to the SAX classes, for the 3convenience of application and driver writers. 4""" 5 6import os, urlparse, urllib, types 7import handler 8import xmlreader 9 10try: 11 _StringTypes = [types.StringType, types.UnicodeType] 12except AttributeError: 13 _StringTypes = [types.StringType] 14 15 16def escape(data, entities={}): 17 """Escape &, <, and > in a string of data. 18 19 You can escape other strings of data by passing a dictionary as 20 the optional entities parameter. The keys and values must all be 21 strings; each key will be replaced with its corresponding value. 22 """ 23 data = data.replace("&", "&") 24 data = data.replace("<", "<") 25 data = data.replace(">", ">") 26 for chars, entity in entities.items(): 27 data = data.replace(chars, entity) 28 return data 29 30 31class XMLGenerator(handler.ContentHandler): 32 33 def __init__(self, out=None, encoding="iso-8859-1"): 34 if out is None: 35 import sys 36 out = sys.stdout 37 handler.ContentHandler.__init__(self) 38 self._out = out 39 self._ns_contexts = [{}] # contains uri -> prefix dicts 40 self._current_context = self._ns_contexts[-1] 41 self._undeclared_ns_maps = [] 42 self._encoding = encoding 43 44 # ContentHandler methods 45 46 def startDocument(self): 47 self._out.write('<?xml version="1.0" encoding="%s"?>\n' % 48 self._encoding) 49 50 def startPrefixMapping(self, prefix, uri): 51 self._ns_contexts.append(self._current_context.copy()) 52 self._current_context[uri] = prefix 53 self._undeclared_ns_maps.append((prefix, uri)) 54 55 def endPrefixMapping(self, prefix): 56 self._current_context = self._ns_contexts[-1] 57 del self._ns_contexts[-1] 58 59 def startElement(self, name, attrs): 60 self._out.write('<' + name) 61 for (name, value) in attrs.items(): 62 self._out.write(' %s="%s"' % (name, escape(value))) 63 self._out.write('>') 64 65 def endElement(self, name): 66 self._out.write('</%s>' % name) 67 68 def startElementNS(self, name, qname, attrs): 69 if name[0] is None: 70 # if the name was not namespace-scoped, use the unqualified part 71 name = name[1] 72 else: 73 # else try to restore the original prefix from the namespace 74 name = self._current_context[name[0]] + ":" + name[1] 75 self._out.write('<' + name) 76 77 for pair in self._undeclared_ns_maps: 78 self._out.write(' xmlns:%s="%s"' % pair) 79 self._undeclared_ns_maps = [] 80 81 for (name, value) in attrs.items(): 82 name = self._current_context[name[0]] + ":" + name[1] 83 self._out.write(' %s="%s"' % (name, escape(value))) 84 self._out.write('>') 85 86 def endElementNS(self, name, qname): 87 if name[0] is None: 88 name = name[1] 89 else: 90 name = self._current_context[name[0]] + ":" + name[1] 91 self._out.write('</%s>' % name) 92 93 def characters(self, content): 94 self._out.write(escape(content)) 95 96 def ignorableWhitespace(self, content): 97 self._out.write(content) 98 99 def processingInstruction(self, target, data): 100 self._out.write('<?%s %s?>' % (target, data)) 101 102 103class XMLFilterBase(xmlreader.XMLReader): 104 """This class is designed to sit between an XMLReader and the 105 client application's event handlers. By default, it does nothing 106 but pass requests up to the reader and events on to the handlers 107 unmodified, but subclasses can override specific methods to modify 108 the event stream or the configuration requests as they pass 109 through.""" 110 111 def __init__(self, parent = None): 112 xmlreader.XMLReader.__init__(self) 113 self._parent = parent 114 115 # ErrorHandler methods 116 117 def error(self, exception): 118 self._err_handler.error(exception) 119 120 def fatalError(self, exception): 121 self._err_handler.fatalError(exception) 122 123 def warning(self, exception): 124 self._err_handler.warning(exception) 125 126 # ContentHandler methods 127 128 def setDocumentLocator(self, locator): 129 self._cont_handler.setDocumentLocator(locator) 130 131 def startDocument(self): 132 self._cont_handler.startDocument() 133 134 def endDocument(self): 135 self._cont_handler.endDocument() 136 137 def startPrefixMapping(self, prefix, uri): 138 self._cont_handler.startPrefixMapping(prefix, uri) 139 140 def endPrefixMapping(self, prefix): 141 self._cont_handler.endPrefixMapping(prefix) 142 143 def startElement(self, name, attrs): 144 self._cont_handler.startElement(name, attrs) 145 146 def endElement(self, name): 147 self._cont_handler.endElement(name) 148 149 def startElementNS(self, name, qname, attrs): 150 self._cont_handler.startElement(name, attrs) 151 152 def endElementNS(self, name, qname): 153 self._cont_handler.endElementNS(name, qname) 154 155 def characters(self, content): 156 self._cont_handler.characters(content) 157 158 def ignorableWhitespace(self, chars): 159 self._cont_handler.ignorableWhitespace(chars) 160 161 def processingInstruction(self, target, data): 162 self._cont_handler.processingInstruction(target, data) 163 164 def skippedEntity(self, name): 165 self._cont_handler.skippedEntity(name) 166 167 # DTDHandler methods 168 169 def notationDecl(self, name, publicId, systemId): 170 self._dtd_handler.notationDecl(name, publicId, systemId) 171 172 def unparsedEntityDecl(self, name, publicId, systemId, ndata): 173 self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata) 174 175 # EntityResolver methods 176 177 def resolveEntity(self, publicId, systemId): 178 self._ent_handler.resolveEntity(publicId, systemId) 179 180 # XMLReader methods 181 182 def parse(self, source): 183 self._parent.setContentHandler(self) 184 self._parent.setErrorHandler(self) 185 self._parent.setEntityResolver(self) 186 self._parent.setDTDHandler(self) 187 self._parent.parse(source) 188 189 def setLocale(self, locale): 190 self._parent.setLocale(locale) 191 192 def getFeature(self, name): 193 return self._parent.getFeature(name) 194 195 def setFeature(self, name, state): 196 self._parent.setFeature(name, state) 197 198 def getProperty(self, name): 199 return self._parent.getProperty(name) 200 201 def setProperty(self, name, value): 202 self._parent.setProperty(name, value) 203 204 # XMLFilter methods 205 206 def getParent(self): 207 return self._parent 208 209 def setParent(self, parent): 210 self._parent = parent 211 212# --- Utility functions 213 214def prepare_input_source(source, base = ""): 215 """This function takes an InputSource and an optional base URL and 216 returns a fully resolved InputSource object ready for reading.""" 217 218 if type(source) in _StringTypes: 219 source = xmlreader.InputSource(source) 220 elif hasattr(source, "read"): 221 f = source 222 source = xmlreader.InputSource() 223 source.setByteStream(f) 224 if hasattr(f, "name"): 225 source.setSystemId(f.name) 226 227 if source.getByteStream() is None: 228 sysid = source.getSystemId() 229 if os.path.isfile(sysid): 230 basehead = os.path.split(os.path.normpath(base))[0] 231 source.setSystemId(os.path.join(basehead, sysid)) 232 f = open(sysid, "rb") 233 else: 234 source.setSystemId(urlparse.urljoin(base, sysid)) 235 f = urllib.urlopen(source.getSystemId()) 236 237 source.setByteStream(f) 238 239 return source 240