expatreader.py revision da204daeaa39019f2134166308c5b9cdfa84b84a
1""" 2SAX driver for the Pyexpat C module. This driver works with 3pyexpat.__version__ == '2.22'. 4""" 5 6version = "0.20" 7 8from xml.sax._exceptions import * 9 10# xml.parsers.expat does not raise ImportError in Jython 11import sys 12if sys.platform[:4] == "java": 13 raise SAXReaderNotAvailable("expat not available in Java", None) 14del sys 15 16try: 17 from xml.parsers import expat 18except ImportError: 19 raise SAXReaderNotAvailable("expat not supported", None) 20else: 21 if not hasattr(expat, "ParserCreate"): 22 raise SAXReaderNotAvailable("expat not supported", None) 23from xml.sax import xmlreader, saxutils, handler 24 25AttributesImpl = xmlreader.AttributesImpl 26AttributesNSImpl = xmlreader.AttributesNSImpl 27 28import string 29import weakref 30 31# --- ExpatLocator 32 33class ExpatLocator(xmlreader.Locator): 34 """Locator for use with the ExpatParser class. 35 36 This uses a weak reference to the parser object to avoid creating 37 a circular reference between the parser and the content handler. 38 """ 39 def __init__(self, parser): 40 self._ref = weakref.ref(parser) 41 42 def getColumnNumber(self): 43 parser = self._ref() 44 if parser is None or parser._parser is None: 45 return None 46 return parser._parser.ErrorColumnNumber 47 48 def getLineNumber(self): 49 parser = self._ref() 50 if parser is None or parser._parser is None: 51 return 1 52 return parser._parser.ErrorLineNumber 53 54 def getPublicId(self): 55 parser = self._ref() 56 if parser is None: 57 return None 58 return parser._source.getPublicId() 59 60 def getSystemId(self): 61 parser = self._ref() 62 if parser is None: 63 return None 64 return parser._source.getSystemId() 65 66 67# --- ExpatParser 68 69class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator): 70 "SAX driver for the Pyexpat C module." 71 72 def __init__(self, namespaceHandling=0, bufsize=2**16-20): 73 xmlreader.IncrementalParser.__init__(self, bufsize) 74 self._source = xmlreader.InputSource() 75 self._parser = None 76 self._namespaces = namespaceHandling 77 self._lex_handler_prop = None 78 self._parsing = 0 79 self._entity_stack = [] 80 81 # XMLReader methods 82 83 def parse(self, source): 84 "Parse an XML document from a URL or an InputSource." 85 source = saxutils.prepare_input_source(source) 86 87 self._source = source 88 self.reset() 89 self._cont_handler.setDocumentLocator(ExpatLocator(self)) 90 xmlreader.IncrementalParser.parse(self, source) 91 92 def prepareParser(self, source): 93 if source.getSystemId() != None: 94 self._parser.SetBase(source.getSystemId()) 95 96 # Redefined setContentHandle to allow changing handlers during parsing 97 98 def setContentHandler(self, handler): 99 xmlreader.IncrementalParser.setContentHandler(self, handler) 100 if self._parsing: 101 self._reset_cont_handler() 102 103 def getFeature(self, name): 104 if name == handler.feature_namespaces: 105 return self._namespaces 106 raise SAXNotRecognizedException("Feature '%s' not recognized" % name) 107 108 def setFeature(self, name, state): 109 if self._parsing: 110 raise SAXNotSupportedException("Cannot set features while parsing") 111 if name == handler.feature_namespaces: 112 self._namespaces = state 113 else: 114 raise SAXNotRecognizedException("Feature '%s' not recognized" % 115 name) 116 117 def getProperty(self, name): 118 if name == handler.property_lexical_handler: 119 return self._lex_handler_prop 120 raise SAXNotRecognizedException("Property '%s' not recognized" % name) 121 122 def setProperty(self, name, value): 123 if name == handler.property_lexical_handler: 124 self._lex_handler_prop = value 125 if self._parsing: 126 self._reset_lex_handler_prop() 127 else: 128 raise SAXNotRecognizedException("Property '%s' not recognized" % name) 129 130 # IncrementalParser methods 131 132 def feed(self, data, isFinal = 0): 133 if not self._parsing: 134 self.reset() 135 self._parsing = 1 136 self._cont_handler.startDocument() 137 138 try: 139 # The isFinal parameter is internal to the expat reader. 140 # If it is set to true, expat will check validity of the entire 141 # document. When feeding chunks, they are not normally final - 142 # except when invoked from close. 143 self._parser.Parse(data, isFinal) 144 except expat.error: 145 error_code = self._parser.ErrorCode 146 exc = SAXParseException(expat.ErrorString(error_code), None, self) 147 # FIXME: when to invoke error()? 148 self._err_handler.fatalError(exc) 149 150 def close(self): 151 if self._entity_stack: 152 # If we are completing an external entity, do nothing here 153 return 154 self.feed("", isFinal = 1) 155 self._cont_handler.endDocument() 156 self._parsing = 0 157 # break cycle created by expat handlers pointing to our methods 158 self._parser = None 159 160 def _reset_cont_handler(self): 161 self._parser.ProcessingInstructionHandler = \ 162 self._cont_handler.processingInstruction 163 self._parser.CharacterDataHandler = self._cont_handler.characters 164 165 def _reset_lex_handler_prop(self): 166 self._parser.CommentHandler = self._lex_handler_prop.comment 167 self._parser.StartCdataSectionHandler = self._lex_handler_prop.startCDATA 168 self._parser.EndCdataSectionHandler = self._lex_handler_prop.endCDATA 169 170 def reset(self): 171 if self._namespaces: 172 self._parser = expat.ParserCreate(None, " ") 173 self._parser.StartElementHandler = self.start_element_ns 174 self._parser.EndElementHandler = self.end_element_ns 175 else: 176 self._parser = expat.ParserCreate() 177 self._parser.StartElementHandler = self.start_element 178 self._parser.EndElementHandler = self.end_element 179 180 self._reset_cont_handler() 181 self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl 182 self._parser.NotationDeclHandler = self.notation_decl 183 self._parser.StartNamespaceDeclHandler = self.start_namespace_decl 184 self._parser.EndNamespaceDeclHandler = self.end_namespace_decl 185 186 self._decl_handler_prop = None 187 if self._lex_handler_prop: 188 self._reset_lex_handler_prop() 189# self._parser.DefaultHandler = 190# self._parser.DefaultHandlerExpand = 191# self._parser.NotStandaloneHandler = 192 self._parser.ExternalEntityRefHandler = self.external_entity_ref 193 194 self._parsing = 0 195 self._entity_stack = [] 196 197 # Locator methods 198 199 def getColumnNumber(self): 200 if self._parser is None: 201 return None 202 return self._parser.ErrorColumnNumber 203 204 def getLineNumber(self): 205 if self._parser is None: 206 return 1 207 return self._parser.ErrorLineNumber 208 209 def getPublicId(self): 210 return self._source.getPublicId() 211 212 def getSystemId(self): 213 return self._source.getSystemId() 214 215 # event handlers 216 def start_element(self, name, attrs): 217 self._cont_handler.startElement(name, AttributesImpl(attrs)) 218 219 def end_element(self, name): 220 self._cont_handler.endElement(name) 221 222 def start_element_ns(self, name, attrs): 223 pair = string.split(name) 224 if len(pair) == 1: 225 pair = (None, name) 226 else: 227 pair = tuple(pair) 228 229 newattrs = {} 230 for (aname, value) in attrs.items(): 231 apair = string.split(aname) 232 if len(apair) == 1: 233 apair = (None, aname) 234 else: 235 apair = tuple(apair) 236 237 newattrs[apair] = value 238 239 self._cont_handler.startElementNS(pair, None, 240 AttributesNSImpl(newattrs, {})) 241 242 def end_element_ns(self, name): 243 pair = string.split(name) 244 if len(pair) == 1: 245 pair = (None, name) 246 else: 247 pair = tuple(pair) 248 249 self._cont_handler.endElementNS(pair, None) 250 251 # this is not used (call directly to ContentHandler) 252 def processing_instruction(self, target, data): 253 self._cont_handler.processingInstruction(target, data) 254 255 # this is not used (call directly to ContentHandler) 256 def character_data(self, data): 257 self._cont_handler.characters(data) 258 259 def start_namespace_decl(self, prefix, uri): 260 self._cont_handler.startPrefixMapping(prefix, uri) 261 262 def end_namespace_decl(self, prefix): 263 self._cont_handler.endPrefixMapping(prefix) 264 265 def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name): 266 self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name) 267 268 def notation_decl(self, name, base, sysid, pubid): 269 self._dtd_handler.notationDecl(name, pubid, sysid) 270 271 def external_entity_ref(self, context, base, sysid, pubid): 272 source = self._ent_handler.resolveEntity(pubid, sysid) 273 source = saxutils.prepare_input_source(source, 274 self._source.getSystemId() or 275 "") 276 277 self._entity_stack.append((self._parser, self._source)) 278 self._parser = self._parser.ExternalEntityParserCreate(context) 279 self._source = source 280 281 try: 282 xmlreader.IncrementalParser.parse(self, source) 283 except: 284 return 0 # FIXME: save error info here? 285 286 (self._parser, self._source) = self._entity_stack[-1] 287 del self._entity_stack[-1] 288 return 1 289 290# --- 291 292def create_parser(*args, **kwargs): 293 return apply(ExpatParser, args, kwargs) 294 295# --- 296 297if __name__ == "__main__": 298 import xml.sax 299 p = create_parser() 300 p.setContentHandler(xml.sax.XMLGenerator()) 301 p.setErrorHandler(xml.sax.ErrorHandler()) 302 p.parse("../../../hamlet.xml") 303