libxml.py revision 5439624bd9167dbb880ea4a75d91677d20a6ebe3
1import libxml2mod 2 3# 4# Errors raised by the wrappers when some tree handling failed. 5# 6class treeError: 7 def __init__(self, msg): 8 self.msg = msg 9 def __str__(self): 10 return self.msg 11 12class parserError: 13 def __init__(self, msg): 14 self.msg = msg 15 def __str__(self): 16 return self.msg 17 18class uriError: 19 def __init__(self, msg): 20 self.msg = msg 21 def __str__(self): 22 return self.msg 23 24class xpathError: 25 def __init__(self, msg): 26 self.msg = msg 27 def __str__(self): 28 return self.msg 29 30class ioWrapper: 31 def __init__(self, _obj): 32 self.__io = _obj 33 self._o = None 34 35 def io_close(self): 36 if self.__io == None: 37 return(-1) 38 self.__io.close() 39 self.__io = None 40 return(0) 41 42 def io_flush(self): 43 if self.__io == None: 44 return(-1) 45 self.__io.flush() 46 return(0) 47 48 def io_read(self, len = -1): 49 if self.__io == None: 50 return(-1) 51 if len < 0: 52 return(self.__io.read()) 53 return(self.__io.read(len)) 54 55 def io_write(self, str, len = -1): 56 if self.__io == None: 57 return(-1) 58 if len < 0: 59 return(self.__io.write(str)) 60 return(self.__io.write(str, len)) 61 62class ioReadWrapper(ioWrapper): 63 def __init__(self, _obj, enc = ""): 64 ioWrapper.__init__(self, _obj) 65 self._o = libxml2mod.xmlCreateInputBuffer(self, enc) 66 67 def __del__(self): 68 print "__del__" 69 self.io_close() 70 if self._o != None: 71 libxml2mod.xmlFreeParserInputBuffer(self._o) 72 self._o = None 73 74 def close(self): 75 self.io_close() 76 if self._o != None: 77 libxml2mod.xmlFreeParserInputBuffer(self._o) 78 self._o = None 79 80class ioWriteWrapper(ioWrapper): 81 def __init__(self, _obj, enc = ""): 82 ioWrapper.__init__(self, _obj) 83 self._o = libxml2mod.xmlCreateOutputBuffer(self, enc) 84 85 def __del__(self): 86 print "__del__" 87 self.io_close() 88 if self._o != None: 89 libxml2mod.xmlOutputBufferClose(self._o) 90 self._o = None 91 92 def close(self): 93 self.io_close() 94 if self._o != None: 95 libxml2mod.xmlOutputBufferClose(self._o) 96 self._o = None 97 98# 99# Example of a class to handle SAX events 100# 101class SAXCallback: 102 """Base class for SAX handlers""" 103 def startDocument(self): 104 """called at the start of the document""" 105 pass 106 107 def endDocument(self): 108 """called at the end of the document""" 109 pass 110 111 def startElement(self, tag, attrs): 112 """called at the start of every element, tag is the name of 113 the element, attrs is a dictionary of the element's attributes""" 114 pass 115 116 def endElement(self, tag): 117 """called at the start of every element, tag is the name of 118 the element""" 119 pass 120 121 def characters(self, data): 122 """called when character data have been read, data is the string 123 containing the data, multiple consecutive characters() callback 124 are possible.""" 125 pass 126 127 def cdataBlock(self, data): 128 """called when CDATA section have been read, data is the string 129 containing the data, multiple consecutive cdataBlock() callback 130 are possible.""" 131 pass 132 133 def reference(self, name): 134 """called when an entity reference has been found""" 135 pass 136 137 def ignorableWhitespace(self, data): 138 """called when potentially ignorable white spaces have been found""" 139 pass 140 141 def processingInstruction(self, target, data): 142 """called when a PI has been found, target contains the PI name and 143 data is the associated data in the PI""" 144 pass 145 146 def comment(self, content): 147 """called when a comment has been found, content contains the comment""" 148 pass 149 150 def externalSubset(self, name, externalID, systemID): 151 """called when a DOCTYPE declaration has been found, name is the 152 DTD name and externalID, systemID are the DTD public and system 153 identifier for that DTd if available""" 154 pass 155 156 def internalSubset(self, name, externalID, systemID): 157 """called when a DOCTYPE declaration has been found, name is the 158 DTD name and externalID, systemID are the DTD public and system 159 identifier for that DTD if available""" 160 pass 161 162 def entityDecl(self, name, type, externalID, systemID, content): 163 """called when an ENTITY declaration has been found, name is the 164 entity name and externalID, systemID are the entity public and 165 system identifier for that entity if available, type indicates 166 the entity type, and content reports it's string content""" 167 pass 168 169 def notationDecl(self, name, externalID, systemID): 170 """called when an NOTATION declaration has been found, name is the 171 notation name and externalID, systemID are the notation public and 172 system identifier for that notation if available""" 173 pass 174 175 def attributeDecl(self, elem, name, type, defi, defaultValue, nameList): 176 """called when an ATTRIBUTE definition has been found""" 177 pass 178 179 def elementDecl(self, name, type, content): 180 """called when an ELEMENT definition has been found""" 181 pass 182 183 def entityDecl(self, name, publicId, systemID, notationName): 184 """called when an unparsed ENTITY declaration has been found, 185 name is the entity name and publicId,, systemID are the entity 186 public and system identifier for that entity if available, 187 and notationName indicate the associated NOTATION""" 188 pass 189 190 def warning(self, msg): 191 print msg 192 193 def error(self, msg): 194 raise parserError(msg) 195 196 def fatalError(self, msg): 197 raise parserError(msg) 198 199# 200# This class is the ancestor of all the Node classes. It provides 201# the basic functionalities shared by all nodes (and handle 202# gracefylly the exception), like name, navigation in the tree, 203# doc reference, content access and serializing to a string or URI 204# 205class xmlCore: 206 def __init__(self, _obj=None): 207 if _obj != None: 208 self._o = _obj; 209 return 210 self._o = None 211 def get_parent(self): 212 ret = libxml2mod.parent(self._o) 213 if ret == None: 214 return None 215 return xmlNode(_obj=ret) 216 def get_children(self): 217 ret = libxml2mod.children(self._o) 218 if ret == None: 219 return None 220 return xmlNode(_obj=ret) 221 def get_last(self): 222 ret = libxml2mod.last(self._o) 223 if ret == None: 224 return None 225 return xmlNode(_obj=ret) 226 def get_next(self): 227 ret = libxml2mod.next(self._o) 228 if ret == None: 229 return None 230 return xmlNode(_obj=ret) 231 def get_properties(self): 232 ret = libxml2mod.properties(self._o) 233 if ret == None: 234 return None 235 return xmlAttr(_obj=ret) 236 def get_prev(self): 237 ret = libxml2mod.prev(self._o) 238 if ret == None: 239 return None 240 return xmlNode(_obj=ret) 241 def get_content(self): 242 return libxml2mod.xmlNodeGetContent(self._o) 243 getContent = get_content # why is this duplicate naming needed ? 244 def get_name(self): 245 return libxml2mod.name(self._o) 246 def get_type(self): 247 return libxml2mod.type(self._o) 248 def get_doc(self): 249 ret = libxml2mod.doc(self._o) 250 if ret == None: 251 if self.type in ["document_xml", "document_html"]: 252 return xmlDoc(_obj=self._o) 253 else: 254 return None 255 return xmlDoc(_obj=ret) 256 # 257 # Those are common attributes to nearly all type of nodes 258 # defined as python2 properties 259 # 260 import sys 261 if float(sys.version[0:3]) < 2.2: 262 def __getattr__(self, attr): 263 if attr == "parent": 264 ret = libxml2mod.parent(self._o) 265 if ret == None: 266 return None 267 return xmlNode(_obj=ret) 268 elif attr == "properties": 269 ret = libxml2mod.properties(self._o) 270 if ret == None: 271 return None 272 return xmlAttr(_obj=ret) 273 elif attr == "children": 274 ret = libxml2mod.children(self._o) 275 if ret == None: 276 return None 277 return xmlNode(_obj=ret) 278 elif attr == "last": 279 ret = libxml2mod.last(self._o) 280 if ret == None: 281 return None 282 return xmlNode(_obj=ret) 283 elif attr == "next": 284 ret = libxml2mod.next(self._o) 285 if ret == None: 286 return None 287 return xmlNode(_obj=ret) 288 elif attr == "prev": 289 ret = libxml2mod.prev(self._o) 290 if ret == None: 291 return None 292 return xmlNode(_obj=ret) 293 elif attr == "content": 294 return libxml2mod.xmlNodeGetContent(self._o) 295 elif attr == "name": 296 return libxml2mod.name(self._o) 297 elif attr == "type": 298 return libxml2mod.type(self._o) 299 elif attr == "doc": 300 ret = libxml2mod.doc(self._o) 301 if ret == None: 302 if self.type == "document_xml" or self.type == "document_html": 303 return xmlDoc(_obj=self._o) 304 else: 305 return None 306 return xmlDoc(_obj=ret) 307 raise AttributeError,attr 308 else: 309 parent = property(get_parent, None, None, "Parent node") 310 children = property(get_children, None, None, "First child node") 311 last = property(get_last, None, None, "Last sibling node") 312 next = property(get_next, None, None, "Next sibling node") 313 prev = property(get_prev, None, None, "Previous sibling node") 314 properties = property(get_properties, None, None, "List of properies") 315 content = property(get_content, None, None, "Content of this node") 316 name = property(get_name, None, None, "Node name") 317 type = property(get_type, None, None, "Node type") 318 doc = property(get_doc, None, None, "The document this node belongs to") 319 320 # 321 # Serialization routines, the optional arguments have the following 322 # meaning: 323 # encoding: string to ask saving in a specific encoding 324 # indent: if 1 the serializer is asked to indent the output 325 # 326 def serialize(self, encoding = None, format = 0): 327 return libxml2mod.serializeNode(self._o, encoding, format) 328 def saveTo(self, file, encoding = None, format = 0): 329 return libxml2mod.saveNodeTo(self._o, file, encoding, format) 330 331 # 332 # Selecting nodes using XPath, a bit slow because the context 333 # is allocated/freed every time but convenient. 334 # 335 def xpathEval(self, expr): 336 doc = self.doc 337 if doc == None: 338 return None 339 ctxt = doc.xpathNewContext() 340 ctxt.setContextNode(self) 341 res = ctxt.xpathEval(expr) 342 ctxt.xpathFreeContext() 343 return res 344 345 # 346 # Selecting nodes using XPath, faster because the context 347 # is allocated just once per xmlDoc. 348 # 349 def xpathEval2(self, expr): 350 doc = self.doc 351 if doc == None: 352 return None 353 try: 354 doc._ctxt.setContextNode(self) 355 except: 356 doc._ctxt = doc.xpathNewContext() 357 doc._ctxt.setContextNode(self) 358 res = doc._ctxt.xpathEval(expr) 359 return res 360 361 # support for python2 iterators 362 def walk_depth_first(self): 363 return xmlCoreDepthFirstItertor(self) 364 def walk_breadth_first(self): 365 return xmlCoreBreadthFirstItertor(self) 366 __iter__ = walk_depth_first 367 368 def free(self): 369 try: 370 self.doc._ctxt.xpathFreeContext() 371 except: 372 pass 373 libxml2mod.freeDoc(self._o) 374 375 376# 377# implements the depth-first iterator for libxml2 DOM tree 378# 379class xmlCoreDepthFirstItertor: 380 def __init__(self, node): 381 self.node = node 382 self.parents = [] 383 def __iter__(self): 384 return self 385 def next(self): 386 while 1: 387 if self.node: 388 ret = self.node 389 self.parents.append(self.node) 390 self.node = self.node.children 391 return ret 392 try: 393 parent = self.parents.pop() 394 except IndexError: 395 raise StopIteration 396 self.node = parent.next 397 398# 399# implements the breadth-first iterator for libxml2 DOM tree 400# 401class xmlCoreBreadthFirstItertor: 402 def __init__(self, node): 403 self.node = node 404 self.parents = [] 405 def __iter__(self): 406 return self 407 def next(self): 408 while 1: 409 if self.node: 410 ret = self.node 411 self.parents.append(self.node) 412 self.node = self.node.next 413 return ret 414 try: 415 parent = self.parents.pop() 416 except IndexError: 417 raise StopIteration 418 self.node = parent.children 419 420# 421# converters to present a nicer view of the XPath returns 422# 423def nodeWrap(o): 424 # TODO try to cast to the most appropriate node class 425 name = libxml2mod.name(o) 426 if name == "element" or name == "text": 427 return xmlNode(_obj=o) 428 if name == "attribute": 429 return xmlAttr(_obj=o) 430 if name[0:8] == "document": 431 return xmlDoc(_obj=o) 432 if name[0:8] == "namespace": 433 return xmlNs(_obj=o) 434 if name == "elem_decl": 435 return xmlElement(_obj=o) 436 if name == "attribute_decl": 437 return xmlAtribute(_obj=o) 438 if name == "entity_decl": 439 return xmlEntity(_obj=o) 440 if name == "dtd": 441 return xmlDtd(_obj=o) 442 return xmlNode(_obj=o) 443 444def xpathObjectRet(o): 445 if type(o) == type([]) or type(o) == type(()): 446 ret = map(lambda x: nodeWrap(x), o) 447 return ret 448 return o 449 450# 451# register an XPath function 452# 453def registerXPathFunction(ctxt, name, ns_uri, f): 454 ret = libxml2mod.xmlRegisterXPathFunction(ctxt, name, ns_uri, f) 455 456# 457# For the xmlTextReader parser configuration 458# 459PARSER_LOADDTD=1 460PARSER_DEFAULTATTRS=2 461PARSER_VALIDATE=3 462PARSER_SUBST_ENTITIES=4 463 464# 465# For the error callback severities 466# 467PARSER_SEVERITY_VALIDITY_WARNING=1 468PARSER_SEVERITY_VALIDITY_ERROR=2 469PARSER_SEVERITY_WARNING=3 470PARSER_SEVERITY_ERROR=4 471 472# 473# register the libxml2 error handler 474# 475def registerErrorHandler(f, ctx): 476 """Register a Python written function to for error reporting. 477 The function is called back as f(ctx, error). """ 478 import sys 479 if not sys.modules.has_key('libxslt'): 480 # normal behaviour when libxslt is not imported 481 ret = libxml2mod.xmlRegisterErrorHandler(f,ctx) 482 else: 483 # when libxslt is already imported, one must 484 # use libxst's error handler instead 485 import libxslt 486 ret = libxslt.registerErrorHandler(f,ctx) 487 return ret 488 489class parserCtxtCore: 490 491 def __init__(self, _obj=None): 492 if _obj != None: 493 self._o = _obj; 494 return 495 self._o = None 496 497 def __del__(self): 498 if self._o != None: 499 libxml2mod.xmlFreeParserCtxt(self._o) 500 self._o = None 501 502 def setErrorHandler(self,f,arg): 503 """Register an error handler that will be called back as 504 f(arg,msg,severity,reserved). 505 506 @reserved is currently always None.""" 507 libxml2mod.xmlParserCtxtSetErrorHandler(self._o,f,arg) 508 509 def getErrorHandler(self): 510 """Return (f,arg) as previously registered with setErrorHandler 511 or (None,None).""" 512 return libxml2mod.xmlParserCtxtGetErrorHandler(self._o) 513 514 def addLocalCatalog(self, uri): 515 """Register a local catalog with the parser""" 516 return libxml2mod.addLocalCatalog(self._o, uri) 517 518 519def _xmlTextReaderErrorFunc((f,arg),msg,severity,locator): 520 """Intermediate callback to wrap the locator""" 521 return f(arg,msg,severity,xmlTextReaderLocator(locator)) 522 523class xmlTextReaderCore: 524 525 def __init__(self, _obj=None): 526 self.input = None 527 if _obj != None:self._o = _obj;return 528 self._o = None 529 530 def __del__(self): 531 if self._o != None: 532 libxml2mod.xmlFreeTextReader(self._o) 533 self._o = None 534 535 def SetErrorHandler(self,f,arg): 536 """Register an error handler that will be called back as 537 f(arg,msg,severity,locator).""" 538 if f is None: 539 libxml2mod.xmlTextReaderSetErrorHandler(\ 540 self._o,None,None) 541 else: 542 libxml2mod.xmlTextReaderSetErrorHandler(\ 543 self._o,_xmlTextReaderErrorFunc,(f,arg)) 544 545 def GetErrorHandler(self): 546 """Return (f,arg) as previously registered with setErrorHandler 547 or (None,None).""" 548 f,arg = libxml2mod.xmlTextReaderGetErrorHandler(self._o) 549 if f is None: 550 return None,None 551 else: 552 # assert f is _xmlTextReaderErrorFunc 553 return arg 554 555# WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING 556# 557# Everything before this line comes from libxml.py 558# Everything after this line is automatically generated 559# 560# WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING 561 562