libxml.py revision e18fc185fa2604ba73f2b259e34796c106ad5545
1import libxml2mod 2 3# 4# Errors raised by the wrappers when some tree handling failed. 5# 6class treeError: 7 def __init__(self, msg): 8 self.msg = msg 9 def __str__(self): 10 return self.msg 11 12class parserError: 13 def __init__(self, msg): 14 self.msg = msg 15 def __str__(self): 16 return self.msg 17 18class uriError: 19 def __init__(self, msg): 20 self.msg = msg 21 def __str__(self): 22 return self.msg 23 24class xpathError: 25 def __init__(self, msg): 26 self.msg = msg 27 def __str__(self): 28 return self.msg 29 30class ioWrapper: 31 def __init__(self, _obj): 32 self.__io = _obj 33 self._o = None 34 35 def io_close(self): 36 if self.__io == None: 37 return(-1) 38 self.__io.close() 39 self.__io = None 40 return(0) 41 42 def io_flush(self): 43 if self.__io == None: 44 return(-1) 45 self.__io.flush() 46 return(0) 47 48 def io_read(self, len = -1): 49 if self.__io == None: 50 return(-1) 51 if len < 0: 52 return(self.__io.read()) 53 return(self.__io.read(len)) 54 55 def io_write(self, str, len = -1): 56 if self.__io == None: 57 return(-1) 58 if len < 0: 59 return(self.__io.write(str)) 60 return(self.__io.write(str, len)) 61 62class ioReadWrapper(ioWrapper): 63 def __init__(self, _obj, enc = ""): 64 ioWrapper.__init__(self, _obj) 65 self._o = libxml2mod.xmlCreateInputBuffer(self, enc) 66 67 def __del__(self): 68 print "__del__" 69 self.io_close() 70 if self._o != None: 71 libxml2mod.xmlFreeParserInputBuffer(self._o) 72 self._o = None 73 74 def close(self): 75 self.io_close() 76 if self._o != None: 77 libxml2mod.xmlFreeParserInputBuffer(self._o) 78 self._o = None 79 80class ioWriteWrapper(ioWrapper): 81 def __init__(self, _obj, enc = ""): 82 ioWrapper.__init__(self, _obj) 83 self._o = libxml2mod.xmlCreateOutputBuffer(self, enc) 84 85 def __del__(self): 86 print "__del__" 87 self.io_close() 88 if self._o != None: 89 libxml2mod.xmlOutputBufferClose(self._o) 90 self._o = None 91 92 def close(self): 93 self.io_close() 94 if self._o != None: 95 libxml2mod.xmlOutputBufferClose(self._o) 96 self._o = None 97 98# 99# Example of a class to handle SAX events 100# 101class SAXCallback: 102 """Base class for SAX handlers""" 103 def startDocument(self): 104 """called at the start of the document""" 105 pass 106 107 def endDocument(self): 108 """called at the end of the document""" 109 pass 110 111 def startElement(self, tag, attrs): 112 """called at the start of every element, tag is the name of 113 the element, attrs is a dictionary of the element's attributes""" 114 pass 115 116 def endElement(self, tag): 117 """called at the start of every element, tag is the name of 118 the element""" 119 pass 120 121 def characters(self, data): 122 """called when character data have been read, data is the string 123 containing the data, multiple consecutive characters() callback 124 are possible.""" 125 pass 126 127 def cdataBlock(self, data): 128 """called when CDATA section have been read, data is the string 129 containing the data, multiple consecutive cdataBlock() callback 130 are possible.""" 131 pass 132 133 def reference(self, name): 134 """called when an entity reference has been found""" 135 pass 136 137 def ignorableWhitespace(self, data): 138 """called when potentially ignorable white spaces have been found""" 139 pass 140 141 def processingInstruction(self, target, data): 142 """called when a PI has been found, target contains the PI name and 143 data is the associated data in the PI""" 144 pass 145 146 def comment(self, content): 147 """called when a comment has been found, content contains the comment""" 148 pass 149 150 def externalSubset(self, name, externalID, systemID): 151 """called when a DOCTYPE declaration has been found, name is the 152 DTD name and externalID, systemID are the DTD public and system 153 identifier for that DTd if available""" 154 pass 155 156 def internalSubset(self, name, externalID, systemID): 157 """called when a DOCTYPE declaration has been found, name is the 158 DTD name and externalID, systemID are the DTD public and system 159 identifier for that DTD if available""" 160 pass 161 162 def entityDecl(self, name, type, externalID, systemID, content): 163 """called when an ENTITY declaration has been found, name is the 164 entity name and externalID, systemID are the entity public and 165 system identifier for that entity if available, type indicates 166 the entity type, and content reports it's string content""" 167 pass 168 169 def notationDecl(self, name, externalID, systemID): 170 """called when an NOTATION declaration has been found, name is the 171 notation name and externalID, systemID are the notation public and 172 system identifier for that notation if available""" 173 pass 174 175 def attributeDecl(self, elem, name, type, defi, defaultValue, nameList): 176 """called when an ATTRIBUTE definition has been found""" 177 pass 178 179 def elementDecl(self, name, type, content): 180 """called when an ELEMENT definition has been found""" 181 pass 182 183 def entityDecl(self, name, publicId, systemID, notationName): 184 """called when an unparsed ENTITY declaration has been found, 185 name is the entity name and publicId,, systemID are the entity 186 public and system identifier for that entity if available, 187 and notationName indicate the associated NOTATION""" 188 pass 189 190 def warning(self, msg): 191 print msg 192 193 def error(self, msg): 194 raise parserError(msg) 195 196 def fatalError(self, msg): 197 raise parserError(msg) 198 199# 200# This class is the ancestor of all the Node classes. It provides 201# the basic functionalities shared by all nodes (and handle 202# gracefylly the exception), like name, navigation in the tree, 203# doc reference, content access and serializing to a string or URI 204# 205class xmlCore: 206 def __init__(self, _obj=None): 207 if _obj != None: 208 self._o = _obj; 209 return 210 self._o = None 211 212 def __getattr__(self, attr): 213 if attr == "parent": 214 ret = libxml2mod.parent(self._o) 215 if ret == None: 216 return None 217 return xmlNode(_obj=ret) 218 elif attr == "properties": 219 ret = libxml2mod.properties(self._o) 220 if ret == None: 221 return None 222 return xmlAttr(_obj=ret) 223 elif attr == "children": 224 ret = libxml2mod.children(self._o) 225 if ret == None: 226 return None 227 return xmlNode(_obj=ret) 228 elif attr == "last": 229 ret = libxml2mod.last(self._o) 230 if ret == None: 231 return None 232 return xmlNode(_obj=ret) 233 elif attr == "next": 234 ret = libxml2mod.next(self._o) 235 if ret == None: 236 return None 237 return xmlNode(_obj=ret) 238 elif attr == "prev": 239 ret = libxml2mod.prev(self._o) 240 if ret == None: 241 return None 242 return xmlNode(_obj=ret) 243 elif attr == "content": 244 return libxml2mod.xmlNodeGetContent(self._o) 245 elif attr == "name": 246 return libxml2mod.name(self._o) 247 elif attr == "type": 248 return libxml2mod.type(self._o) 249 elif attr == "doc": 250 ret = libxml2mod.doc(self._o) 251 if ret == None: 252 if self.type == "document_xml" or self.type == "document_html": 253 return xmlDoc(_obj=self._o) 254 else: 255 return None 256 return xmlDoc(_obj=ret) 257 raise AttributeError,attr 258 259 # 260 # Those are common attributes to nearly all type of nodes 261 # 262 def get_parent(self): 263 ret = libxml2mod.parent(self._o) 264 if ret == None: 265 return None 266 return xmlNode(_obj=ret) 267 def get_children(self): 268 ret = libxml2mod.children(self._o) 269 if ret == None: 270 return None 271 return xmlNode(_obj=ret) 272 def get_last(self): 273 ret = libxml2mod.last(self._o) 274 if ret == None: 275 return None 276 return xmlNode(_obj=ret) 277 def get_next(self): 278 ret = libxml2mod.next(self._o) 279 if ret == None: 280 return None 281 return xmlNode(_obj=ret) 282 def get_properties(self): 283 ret = libxml2mod.properties(self._o) 284 if ret == None: 285 return None 286 return xmlAttr(_obj=ret) 287 def get_doc(self): 288 ret = libxml2mod.doc(self._o) 289 if ret == None: 290 return None 291 return xmlDoc(_obj=ret) 292 def get_prev(self): 293 ret = libxml2mod.prev(self._o) 294 if ret == None: 295 return None 296 return xmlNode(_obj=ret) 297 def get_content(self): 298 return libxml2mod.xmlNodeGetContent(self._o) 299 def getContent(self): 300 return libxml2mod.xmlNodeGetContent(self._o) 301 def get_name(self): 302 return libxml2mod.name(self._o) 303 def get_type(self): 304 return libxml2mod.type(self._o) 305 def get_doc(self): 306 ret = libxml2mod.doc(self._o) 307 if ret == None: 308 return None 309 return xmlDoc(_obj=ret) 310 def free(self): 311 libxml2mod.freeDoc(self._o) 312 313 # 314 # Serialization routines, the optional arguments have the following 315 # meaning: 316 # encoding: string to ask saving in a specific encoding 317 # format: if 1 the serializer is asked to indent the output 318 # 319 def serialize(self, encoding = None, format = 0): 320 return libxml2mod.serializeNode(self._o, encoding, format) 321 def saveTo(self, file, encoding = None, format = 0): 322 return libxml2mod.saveNodeTo(self._o, file, encoding, format) 323 324 # 325 # Selecting nodes using XPath, a bit slow because the context 326 # is allocated/freed every time but convenient. 327 # 328 def xpathEval(self, expr): 329 doc = self.doc 330 if doc == None: 331 return None 332 ctxt = doc.xpathNewContext() 333 ctxt.setContextNode(self) 334 res = ctxt.xpathEval(expr) 335 ctxt.xpathFreeContext() 336 return res 337 338# 339# converters to present a nicer view of the XPath returns 340# 341def nodeWrap(o): 342 # TODO try to cast to the most appropriate node class 343 name = libxml2mod.name(o) 344 if name == "element" or name == "text": 345 return xmlNode(_obj=o) 346 if name == "attribute": 347 return xmlAttr(_obj=o) 348 if name[0:8] == "document": 349 return xmlDoc(_obj=o) 350 if name[0:8] == "namespace": 351 return xmlNs(_obj=o) 352 if name == "elem_decl": 353 return xmlElement(_obj=o) 354 if name == "attribute_decl": 355 return xmlAtribute(_obj=o) 356 if name == "entity_decl": 357 return xmlEntity(_obj=o) 358 if name == "dtd": 359 return xmlAttr(_obj=o) 360 return xmlNode(_obj=o) 361 362def xpathObjectRet(o): 363 if type(o) == type([]) or type(o) == type(()): 364 ret = map(lambda x: nodeWrap(x), o) 365 return ret 366 return o 367 368# 369# register an XPath function 370# 371def registerXPathFunction(ctxt, name, ns_uri, f): 372 ret = libxml2mod.xmlRegisterXPathFunction(ctxt, name, ns_uri, f) 373 374 375# 376# For the xmlTextReader parser configuration 377# 378PARSER_LOADDTD=1 379PARSER_DEFAULTATTRS=2 380PARSER_VALIDATE=3 381PARSER_SUBST_ENTITIES=4 382 383# 384# Everything below this point is automatically generated 385# 386 387