libxml.py revision 87ab1c129868978f1806da36496c8c519852c6ad
1import libxml2mod 2import types 3 4# 5# Errors raised by the wrappers when some tree handling failed. 6# 7class treeError: 8 def __init__(self, msg): 9 self.msg = msg 10 def __str__(self): 11 return self.msg 12 13class parserError: 14 def __init__(self, msg): 15 self.msg = msg 16 def __str__(self): 17 return self.msg 18 19class uriError: 20 def __init__(self, msg): 21 self.msg = msg 22 def __str__(self): 23 return self.msg 24 25class xpathError: 26 def __init__(self, msg): 27 self.msg = msg 28 def __str__(self): 29 return self.msg 30 31class ioWrapper: 32 def __init__(self, _obj): 33 self.__io = _obj 34 self._o = None 35 36 def io_close(self): 37 if self.__io == None: 38 return(-1) 39 self.__io.close() 40 self.__io = None 41 return(0) 42 43 def io_flush(self): 44 if self.__io == None: 45 return(-1) 46 self.__io.flush() 47 return(0) 48 49 def io_read(self, len = -1): 50 if self.__io == None: 51 return(-1) 52 if len < 0: 53 return(self.__io.read()) 54 return(self.__io.read(len)) 55 56 def io_write(self, str, len = -1): 57 if self.__io == None: 58 return(-1) 59 if len < 0: 60 return(self.__io.write(str)) 61 return(self.__io.write(str, len)) 62 63class ioReadWrapper(ioWrapper): 64 def __init__(self, _obj, enc = ""): 65 ioWrapper.__init__(self, _obj) 66 self._o = libxml2mod.xmlCreateInputBuffer(self, enc) 67 68 def __del__(self): 69 print "__del__" 70 self.io_close() 71 if self._o != None: 72 libxml2mod.xmlFreeParserInputBuffer(self._o) 73 self._o = None 74 75 def close(self): 76 self.io_close() 77 if self._o != None: 78 libxml2mod.xmlFreeParserInputBuffer(self._o) 79 self._o = None 80 81class ioWriteWrapper(ioWrapper): 82 def __init__(self, _obj, enc = ""): 83# print "ioWriteWrapper.__init__", _obj 84 if type(_obj) == type(''): 85 print "write io from a string" 86 self.o = None 87 elif type(_obj) == types.InstanceType: 88 print "write io from instance of %s" % (_obj.__class__) 89 ioWrapper.__init__(self, _obj) 90 self._o = libxml2mod.xmlCreateOutputBuffer(self, enc) 91 else: 92 file = libxml2mod.outputBufferGetPythonFile(_obj) 93 if file != None: 94 ioWrapper.__init__(self, file) 95 else: 96 ioWrapper.__init__(self, _obj) 97 self._o = _obj 98 99 def __del__(self): 100# print "__del__" 101 self.io_close() 102 if self._o != None: 103 libxml2mod.xmlOutputBufferClose(self._o) 104 self._o = None 105 106 def flush(self): 107 self.io_flush() 108 if self._o != None: 109 libxml2mod.xmlOutputBufferClose(self._o) 110 self._o = None 111 112 def close(self): 113 self.io_flush() 114 if self._o != None: 115 libxml2mod.xmlOutputBufferClose(self._o) 116 self._o = None 117 118# 119# Example of a class to handle SAX events 120# 121class SAXCallback: 122 """Base class for SAX handlers""" 123 def startDocument(self): 124 """called at the start of the document""" 125 pass 126 127 def endDocument(self): 128 """called at the end of the document""" 129 pass 130 131 def startElement(self, tag, attrs): 132 """called at the start of every element, tag is the name of 133 the element, attrs is a dictionary of the element's attributes""" 134 pass 135 136 def endElement(self, tag): 137 """called at the start of every element, tag is the name of 138 the element""" 139 pass 140 141 def characters(self, data): 142 """called when character data have been read, data is the string 143 containing the data, multiple consecutive characters() callback 144 are possible.""" 145 pass 146 147 def cdataBlock(self, data): 148 """called when CDATA section have been read, data is the string 149 containing the data, multiple consecutive cdataBlock() callback 150 are possible.""" 151 pass 152 153 def reference(self, name): 154 """called when an entity reference has been found""" 155 pass 156 157 def ignorableWhitespace(self, data): 158 """called when potentially ignorable white spaces have been found""" 159 pass 160 161 def processingInstruction(self, target, data): 162 """called when a PI has been found, target contains the PI name and 163 data is the associated data in the PI""" 164 pass 165 166 def comment(self, content): 167 """called when a comment has been found, content contains the comment""" 168 pass 169 170 def externalSubset(self, name, externalID, systemID): 171 """called when a DOCTYPE declaration has been found, name is the 172 DTD name and externalID, systemID are the DTD public and system 173 identifier for that DTd if available""" 174 pass 175 176 def internalSubset(self, name, externalID, systemID): 177 """called when a DOCTYPE declaration has been found, name is the 178 DTD name and externalID, systemID are the DTD public and system 179 identifier for that DTD if available""" 180 pass 181 182 def entityDecl(self, name, type, externalID, systemID, content): 183 """called when an ENTITY declaration has been found, name is the 184 entity name and externalID, systemID are the entity public and 185 system identifier for that entity if available, type indicates 186 the entity type, and content reports it's string content""" 187 pass 188 189 def notationDecl(self, name, externalID, systemID): 190 """called when an NOTATION declaration has been found, name is the 191 notation name and externalID, systemID are the notation public and 192 system identifier for that notation if available""" 193 pass 194 195 def attributeDecl(self, elem, name, type, defi, defaultValue, nameList): 196 """called when an ATTRIBUTE definition has been found""" 197 pass 198 199 def elementDecl(self, name, type, content): 200 """called when an ELEMENT definition has been found""" 201 pass 202 203 def entityDecl(self, name, publicId, systemID, notationName): 204 """called when an unparsed ENTITY declaration has been found, 205 name is the entity name and publicId,, systemID are the entity 206 public and system identifier for that entity if available, 207 and notationName indicate the associated NOTATION""" 208 pass 209 210 def warning(self, msg): 211 print msg 212 213 def error(self, msg): 214 raise parserError(msg) 215 216 def fatalError(self, msg): 217 raise parserError(msg) 218 219# 220# This class is the ancestor of all the Node classes. It provides 221# the basic functionalities shared by all nodes (and handle 222# gracefylly the exception), like name, navigation in the tree, 223# doc reference, content access and serializing to a string or URI 224# 225class xmlCore: 226 def __init__(self, _obj=None): 227 if _obj != None: 228 self._o = _obj; 229 return 230 self._o = None 231 def get_parent(self): 232 ret = libxml2mod.parent(self._o) 233 if ret == None: 234 return None 235 return xmlNode(_obj=ret) 236 def get_children(self): 237 ret = libxml2mod.children(self._o) 238 if ret == None: 239 return None 240 return xmlNode(_obj=ret) 241 def get_last(self): 242 ret = libxml2mod.last(self._o) 243 if ret == None: 244 return None 245 return xmlNode(_obj=ret) 246 def get_next(self): 247 ret = libxml2mod.next(self._o) 248 if ret == None: 249 return None 250 return xmlNode(_obj=ret) 251 def get_properties(self): 252 ret = libxml2mod.properties(self._o) 253 if ret == None: 254 return None 255 return xmlAttr(_obj=ret) 256 def get_prev(self): 257 ret = libxml2mod.prev(self._o) 258 if ret == None: 259 return None 260 return xmlNode(_obj=ret) 261 def get_content(self): 262 return libxml2mod.xmlNodeGetContent(self._o) 263 getContent = get_content # why is this duplicate naming needed ? 264 def get_name(self): 265 return libxml2mod.name(self._o) 266 def get_type(self): 267 return libxml2mod.type(self._o) 268 def get_doc(self): 269 ret = libxml2mod.doc(self._o) 270 if ret == None: 271 if self.type in ["document_xml", "document_html"]: 272 return xmlDoc(_obj=self._o) 273 else: 274 return None 275 return xmlDoc(_obj=ret) 276 # 277 # Those are common attributes to nearly all type of nodes 278 # defined as python2 properties 279 # 280 import sys 281 if float(sys.version[0:3]) < 2.2: 282 def __getattr__(self, attr): 283 if attr == "parent": 284 ret = libxml2mod.parent(self._o) 285 if ret == None: 286 return None 287 return xmlNode(_obj=ret) 288 elif attr == "properties": 289 ret = libxml2mod.properties(self._o) 290 if ret == None: 291 return None 292 return xmlAttr(_obj=ret) 293 elif attr == "children": 294 ret = libxml2mod.children(self._o) 295 if ret == None: 296 return None 297 return xmlNode(_obj=ret) 298 elif attr == "last": 299 ret = libxml2mod.last(self._o) 300 if ret == None: 301 return None 302 return xmlNode(_obj=ret) 303 elif attr == "next": 304 ret = libxml2mod.next(self._o) 305 if ret == None: 306 return None 307 return xmlNode(_obj=ret) 308 elif attr == "prev": 309 ret = libxml2mod.prev(self._o) 310 if ret == None: 311 return None 312 return xmlNode(_obj=ret) 313 elif attr == "content": 314 return libxml2mod.xmlNodeGetContent(self._o) 315 elif attr == "name": 316 return libxml2mod.name(self._o) 317 elif attr == "type": 318 return libxml2mod.type(self._o) 319 elif attr == "doc": 320 ret = libxml2mod.doc(self._o) 321 if ret == None: 322 if self.type == "document_xml" or self.type == "document_html": 323 return xmlDoc(_obj=self._o) 324 else: 325 return None 326 return xmlDoc(_obj=ret) 327 raise AttributeError,attr 328 else: 329 parent = property(get_parent, None, None, "Parent node") 330 children = property(get_children, None, None, "First child node") 331 last = property(get_last, None, None, "Last sibling node") 332 next = property(get_next, None, None, "Next sibling node") 333 prev = property(get_prev, None, None, "Previous sibling node") 334 properties = property(get_properties, None, None, "List of properies") 335 content = property(get_content, None, None, "Content of this node") 336 name = property(get_name, None, None, "Node name") 337 type = property(get_type, None, None, "Node type") 338 doc = property(get_doc, None, None, "The document this node belongs to") 339 340 # 341 # Serialization routines, the optional arguments have the following 342 # meaning: 343 # encoding: string to ask saving in a specific encoding 344 # indent: if 1 the serializer is asked to indent the output 345 # 346 def serialize(self, encoding = None, format = 0): 347 return libxml2mod.serializeNode(self._o, encoding, format) 348 def saveTo(self, file, encoding = None, format = 0): 349 return libxml2mod.saveNodeTo(self._o, file, encoding, format) 350 351 # 352 # Selecting nodes using XPath, a bit slow because the context 353 # is allocated/freed every time but convenient. 354 # 355 def xpathEval(self, expr): 356 doc = self.doc 357 if doc == None: 358 return None 359 ctxt = doc.xpathNewContext() 360 ctxt.setContextNode(self) 361 res = ctxt.xpathEval(expr) 362 ctxt.xpathFreeContext() 363 return res 364 365# # 366# # Selecting nodes using XPath, faster because the context 367# # is allocated just once per xmlDoc. 368# # 369# # Removed: DV memleaks c.f. #126735 370# # 371# def xpathEval2(self, expr): 372# doc = self.doc 373# if doc == None: 374# return None 375# try: 376# doc._ctxt.setContextNode(self) 377# except: 378# doc._ctxt = doc.xpathNewContext() 379# doc._ctxt.setContextNode(self) 380# res = doc._ctxt.xpathEval(expr) 381# return res 382 def xpathEval2(self, expr): 383 return self.xpathEval(expr) 384 385 # support for python2 iterators 386 def walk_depth_first(self): 387 return xmlCoreDepthFirstItertor(self) 388 def walk_breadth_first(self): 389 return xmlCoreBreadthFirstItertor(self) 390 __iter__ = walk_depth_first 391 392 def free(self): 393 try: 394 self.doc._ctxt.xpathFreeContext() 395 except: 396 pass 397 libxml2mod.xmlFreeDoc(self._o) 398 399 400# 401# implements the depth-first iterator for libxml2 DOM tree 402# 403class xmlCoreDepthFirstItertor: 404 def __init__(self, node): 405 self.node = node 406 self.parents = [] 407 def __iter__(self): 408 return self 409 def next(self): 410 while 1: 411 if self.node: 412 ret = self.node 413 self.parents.append(self.node) 414 self.node = self.node.children 415 return ret 416 try: 417 parent = self.parents.pop() 418 except IndexError: 419 raise StopIteration 420 self.node = parent.next 421 422# 423# implements the breadth-first iterator for libxml2 DOM tree 424# 425class xmlCoreBreadthFirstItertor: 426 def __init__(self, node): 427 self.node = node 428 self.parents = [] 429 def __iter__(self): 430 return self 431 def next(self): 432 while 1: 433 if self.node: 434 ret = self.node 435 self.parents.append(self.node) 436 self.node = self.node.next 437 return ret 438 try: 439 parent = self.parents.pop() 440 except IndexError: 441 raise StopIteration 442 self.node = parent.children 443 444# 445# converters to present a nicer view of the XPath returns 446# 447def nodeWrap(o): 448 # TODO try to cast to the most appropriate node class 449 name = libxml2mod.name(o) 450 if name == "element" or name == "text": 451 return xmlNode(_obj=o) 452 if name == "attribute": 453 return xmlAttr(_obj=o) 454 if name[0:8] == "document": 455 return xmlDoc(_obj=o) 456 if name[0:8] == "namespace": 457 return xmlNs(_obj=o) 458 if name == "elem_decl": 459 return xmlElement(_obj=o) 460 if name == "attribute_decl": 461 return xmlAtribute(_obj=o) 462 if name == "entity_decl": 463 return xmlEntity(_obj=o) 464 if name == "dtd": 465 return xmlDtd(_obj=o) 466 return xmlNode(_obj=o) 467 468def xpathObjectRet(o): 469 if type(o) == type([]) or type(o) == type(()): 470 ret = map(lambda x: nodeWrap(x), o) 471 return ret 472 return o 473 474# 475# register an XPath function 476# 477def registerXPathFunction(ctxt, name, ns_uri, f): 478 ret = libxml2mod.xmlRegisterXPathFunction(ctxt, name, ns_uri, f) 479 480# 481# For the xmlTextReader parser configuration 482# 483PARSER_LOADDTD=1 484PARSER_DEFAULTATTRS=2 485PARSER_VALIDATE=3 486PARSER_SUBST_ENTITIES=4 487 488# 489# For the error callback severities 490# 491PARSER_SEVERITY_VALIDITY_WARNING=1 492PARSER_SEVERITY_VALIDITY_ERROR=2 493PARSER_SEVERITY_WARNING=3 494PARSER_SEVERITY_ERROR=4 495 496# 497# register the libxml2 error handler 498# 499def registerErrorHandler(f, ctx): 500 """Register a Python written function to for error reporting. 501 The function is called back as f(ctx, error). """ 502 import sys 503 if not sys.modules.has_key('libxslt'): 504 # normal behaviour when libxslt is not imported 505 ret = libxml2mod.xmlRegisterErrorHandler(f,ctx) 506 else: 507 # when libxslt is already imported, one must 508 # use libxst's error handler instead 509 import libxslt 510 ret = libxslt.registerErrorHandler(f,ctx) 511 return ret 512 513class parserCtxtCore: 514 515 def __init__(self, _obj=None): 516 if _obj != None: 517 self._o = _obj; 518 return 519 self._o = None 520 521 def __del__(self): 522 if self._o != None: 523 libxml2mod.xmlFreeParserCtxt(self._o) 524 self._o = None 525 526 def setErrorHandler(self,f,arg): 527 """Register an error handler that will be called back as 528 f(arg,msg,severity,reserved). 529 530 @reserved is currently always None.""" 531 libxml2mod.xmlParserCtxtSetErrorHandler(self._o,f,arg) 532 533 def getErrorHandler(self): 534 """Return (f,arg) as previously registered with setErrorHandler 535 or (None,None).""" 536 return libxml2mod.xmlParserCtxtGetErrorHandler(self._o) 537 538 def addLocalCatalog(self, uri): 539 """Register a local catalog with the parser""" 540 return libxml2mod.addLocalCatalog(self._o, uri) 541 542 543def _xmlTextReaderErrorFunc((f,arg),msg,severity,locator): 544 """Intermediate callback to wrap the locator""" 545 return f(arg,msg,severity,xmlTextReaderLocator(locator)) 546 547class xmlTextReaderCore: 548 549 def __init__(self, _obj=None): 550 self.input = None 551 if _obj != None:self._o = _obj;return 552 self._o = None 553 554 def __del__(self): 555 if self._o != None: 556 libxml2mod.xmlFreeTextReader(self._o) 557 self._o = None 558 559 def SetErrorHandler(self,f,arg): 560 """Register an error handler that will be called back as 561 f(arg,msg,severity,locator).""" 562 if f is None: 563 libxml2mod.xmlTextReaderSetErrorHandler(\ 564 self._o,None,None) 565 else: 566 libxml2mod.xmlTextReaderSetErrorHandler(\ 567 self._o,_xmlTextReaderErrorFunc,(f,arg)) 568 569 def GetErrorHandler(self): 570 """Return (f,arg) as previously registered with setErrorHandler 571 or (None,None).""" 572 f,arg = libxml2mod.xmlTextReaderGetErrorHandler(self._o) 573 if f is None: 574 return None,None 575 else: 576 # assert f is _xmlTextReaderErrorFunc 577 return arg 578 579 580# WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING 581# 582# Everything before this line comes from libxml.py 583# Everything after this line is automatically generated 584# 585# WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING 586 587