expatreader.py revision e3c37d660f5641f55c12313fde8e20f8178d942a
1""" 2SAX driver for the Pyexpat C module. This driver works with 3pyexpat.__version__ == '2.22'. 4""" 5 6version = "0.20" 7 8from xml.sax._exceptions import * 9 10# xml.parsers.expat does not raise ImportError in Jython 11import sys 12if sys.platform[ : 4] == "java": 13 raise SAXReaderNotAvailable("expat not available in Java", None) 14del sys 15 16try: 17 from xml.parsers import expat 18except ImportError: 19 raise SAXReaderNotAvailable("expat not supported",None) 20else: 21 if not hasattr(expat, "ParserCreate"): 22 raise SAXReaderNotAvailable("expat not supported",None) 23from xml.sax import xmlreader, saxutils, handler 24 25AttributesImpl = xmlreader.AttributesImpl 26AttributesNSImpl = xmlreader.AttributesNSImpl 27 28import string 29 30# --- ExpatParser 31 32class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator): 33 "SAX driver for the Pyexpat C module." 34 35 def __init__(self, namespaceHandling=0, bufsize=2**16-20): 36 xmlreader.IncrementalParser.__init__(self, bufsize) 37 self._source = xmlreader.InputSource() 38 self._parser = None 39 self._namespaces = namespaceHandling 40 self._lex_handler_prop = None 41 self._parsing = 0 42 self._entity_stack = [] 43 44 # XMLReader methods 45 46 def parse(self, source): 47 "Parse an XML document from a URL or an InputSource." 48 source = saxutils.prepare_input_source(source) 49 50 self._source = source 51 self.reset() 52 self._cont_handler.setDocumentLocator(self) 53 xmlreader.IncrementalParser.parse(self, source) 54 55 def prepareParser(self, source): 56 if source.getSystemId() != None: 57 self._parser.SetBase(source.getSystemId()) 58 59 # Redefined setContentHandle to allow changing handlers during parsing 60 61 def setContentHandler(self, handler): 62 xmlreader.IncrementalParser.setContentHandler(self, handler) 63 if self._parsing: 64 self._reset_cont_handler() 65 66 def getFeature(self, name): 67 if name == handler.feature_namespaces: 68 return self._namespaces 69 raise SAXNotRecognizedException("Feature '%s' not recognized" % name) 70 71 def setFeature(self, name, state): 72 if self._parsing: 73 raise SAXNotSupportedException("Cannot set features while parsing") 74 if name == handler.feature_namespaces: 75 self._namespaces = state 76 else: 77 raise SAXNotRecognizedException("Feature '%s' not recognized" % 78 name) 79 80 def getProperty(self, name): 81 if name == handler.property_lexical_handler: 82 return self._lex_handler_prop 83 raise SAXNotRecognizedException("Property '%s' not recognized" % name) 84 85 def setProperty(self, name, value): 86 if name == handler.property_lexical_handler: 87 self._lex_handler_prop = value 88 if self._parsing: 89 self._reset_lex_handler_prop() 90 else: 91 raise SAXNotRecognizedException("Property '%s' not recognized" % name) 92 93 # IncrementalParser methods 94 95 def feed(self, data, isFinal = 0): 96 if not self._parsing: 97 self.reset() 98 self._parsing = 1 99 self._cont_handler.startDocument() 100 101 try: 102 # The isFinal parameter is internal to the expat reader. 103 # If it is set to true, expat will check validity of the entire 104 # document. When feeding chunks, they are not normally final - 105 # except when invoked from close. 106 self._parser.Parse(data, isFinal) 107 except expat.error: 108 error_code = self._parser.ErrorCode 109 exc = SAXParseException(expat.ErrorString(error_code), None, self) 110 # FIXME: when to invoke error()? 111 self._err_handler.fatalError(exc) 112 113 def close(self): 114 if self._entity_stack: 115 # If we are completing an external entity, do nothing here 116 return 117 self.feed("", isFinal = 1) 118 self._cont_handler.endDocument() 119 self._parsing = 0 120 # break cycle created by expat handlers pointing to our methods 121 self._parser = None 122 123 def _reset_cont_handler(self): 124 self._parser.ProcessingInstructionHandler = \ 125 self._cont_handler.processingInstruction 126 self._parser.CharacterDataHandler = self._cont_handler.characters 127 128 def _reset_lex_handler_prop(self): 129 self._parser.CommentHandler = self._lex_handler_prop.comment 130 self._parser.StartCdataSectionHandler = self._lex_handler_prop.startCDATA 131 self._parser.EndCdataSectionHandler = self._lex_handler_prop.endCDATA 132 133 def reset(self): 134 if self._namespaces: 135 self._parser = expat.ParserCreate(None, " ") 136 self._parser.StartElementHandler = self.start_element_ns 137 self._parser.EndElementHandler = self.end_element_ns 138 else: 139 self._parser = expat.ParserCreate() 140 self._parser.StartElementHandler = self.start_element 141 self._parser.EndElementHandler = self.end_element 142 143 self._reset_cont_handler() 144 self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl 145 self._parser.NotationDeclHandler = self.notation_decl 146 self._parser.StartNamespaceDeclHandler = self.start_namespace_decl 147 self._parser.EndNamespaceDeclHandler = self.end_namespace_decl 148 149 self._decl_handler_prop = None 150 if self._lex_handler_prop: 151 self._reset_lex_handler_prop() 152# self._parser.DefaultHandler = 153# self._parser.DefaultHandlerExpand = 154# self._parser.NotStandaloneHandler = 155 self._parser.ExternalEntityRefHandler = self.external_entity_ref 156 157 self._parsing = 0 158 self._entity_stack = [] 159 160 # Locator methods 161 162 def getColumnNumber(self): 163 if self._parser is None: 164 return None 165 return self._parser.ErrorColumnNumber 166 167 def getLineNumber(self): 168 if self._parser is None: 169 return 1 170 return self._parser.ErrorLineNumber 171 172 def getPublicId(self): 173 return self._source.getPublicId() 174 175 def getSystemId(self): 176 return self._source.getSystemId() 177 178 # event handlers 179 def start_element(self, name, attrs): 180 self._cont_handler.startElement(name, AttributesImpl(attrs)) 181 182 def end_element(self, name): 183 self._cont_handler.endElement(name) 184 185 def start_element_ns(self, name, attrs): 186 pair = string.split(name) 187 if len(pair) == 1: 188 pair = (None, name) 189 else: 190 pair = tuple(pair) 191 192 newattrs = {} 193 for (aname, value) in attrs.items(): 194 apair = string.split(aname) 195 if len(apair) == 1: 196 apair = (None, aname) 197 else: 198 apair = tuple(apair) 199 200 newattrs[apair] = value 201 202 self._cont_handler.startElementNS(pair, None, 203 AttributesNSImpl(newattrs, {})) 204 205 def end_element_ns(self, name): 206 pair = string.split(name) 207 if len(pair) == 1: 208 pair = (None, name) 209 else: 210 pair = tuple(pair) 211 212 self._cont_handler.endElementNS(pair, None) 213 214 # this is not used (call directly to ContentHandler) 215 def processing_instruction(self, target, data): 216 self._cont_handler.processingInstruction(target, data) 217 218 # this is not used (call directly to ContentHandler) 219 def character_data(self, data): 220 self._cont_handler.characters(data) 221 222 def start_namespace_decl(self, prefix, uri): 223 self._cont_handler.startPrefixMapping(prefix, uri) 224 225 def end_namespace_decl(self, prefix): 226 self._cont_handler.endPrefixMapping(prefix) 227 228 def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name): 229 self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name) 230 231 def notation_decl(self, name, base, sysid, pubid): 232 self._dtd_handler.notationDecl(name, pubid, sysid) 233 234 def external_entity_ref(self, context, base, sysid, pubid): 235 source = self._ent_handler.resolveEntity(pubid, sysid) 236 source = saxutils.prepare_input_source(source, 237 self._source.getSystemId() or 238 "") 239 240 self._entity_stack.append((self._parser, self._source)) 241 self._parser = self._parser.ExternalEntityParserCreate(context) 242 self._source = source 243 244 try: 245 xmlreader.IncrementalParser.parse(self, source) 246 except: 247 return 0 # FIXME: save error info here? 248 249 (self._parser, self._source) = self._entity_stack[-1] 250 del self._entity_stack[-1] 251 return 1 252 253# --- 254 255def create_parser(*args, **kwargs): 256 return apply(ExpatParser, args, kwargs) 257 258# --- 259 260if __name__ == "__main__": 261 import xml.sax 262 p = create_parser() 263 p.setContentHandler(xml.sax.XMLGenerator()) 264 p.setErrorHandler(xml.sax.ErrorHandler()) 265 p.parse("../../../hamlet.xml") 266