libxml.py revision e18fc185fa2604ba73f2b259e34796c106ad5545
1import libxml2mod
2
3#
4# Errors raised by the wrappers when some tree handling failed.
5#
6class treeError:
7    def __init__(self, msg):
8        self.msg = msg
9    def __str__(self):
10        return self.msg
11
12class parserError:
13    def __init__(self, msg):
14        self.msg = msg
15    def __str__(self):
16        return self.msg
17
18class uriError:
19    def __init__(self, msg):
20        self.msg = msg
21    def __str__(self):
22        return self.msg
23
24class xpathError:
25    def __init__(self, msg):
26        self.msg = msg
27    def __str__(self):
28        return self.msg
29
30class ioWrapper:
31    def __init__(self, _obj):
32        self.__io = _obj
33        self._o = None
34
35    def io_close(self):
36        if self.__io == None:
37	    return(-1)
38	self.__io.close()
39	self.__io = None
40	return(0)
41
42    def io_flush(self):
43        if self.__io == None:
44	    return(-1)
45	self.__io.flush()
46	return(0)
47
48    def io_read(self, len = -1):
49        if self.__io == None:
50	    return(-1)
51        if len < 0:
52	    return(self.__io.read())
53	return(self.__io.read(len))
54
55    def io_write(self, str, len = -1):
56        if self.__io == None:
57	    return(-1)
58        if len < 0:
59	    return(self.__io.write(str))
60	return(self.__io.write(str, len))
61
62class ioReadWrapper(ioWrapper):
63    def __init__(self, _obj, enc = ""):
64        ioWrapper.__init__(self, _obj)
65        self._o = libxml2mod.xmlCreateInputBuffer(self, enc)
66
67    def __del__(self):
68        print "__del__"
69        self.io_close()
70        if self._o != None:
71            libxml2mod.xmlFreeParserInputBuffer(self._o)
72        self._o = None
73
74    def close(self):
75        self.io_close()
76        if self._o != None:
77            libxml2mod.xmlFreeParserInputBuffer(self._o)
78        self._o = None
79
80class ioWriteWrapper(ioWrapper):
81    def __init__(self, _obj, enc = ""):
82        ioWrapper.__init__(self, _obj)
83        self._o = libxml2mod.xmlCreateOutputBuffer(self, enc)
84
85    def __del__(self):
86        print "__del__"
87        self.io_close()
88        if self._o != None:
89            libxml2mod.xmlOutputBufferClose(self._o)
90        self._o = None
91
92    def close(self):
93        self.io_close()
94        if self._o != None:
95            libxml2mod.xmlOutputBufferClose(self._o)
96        self._o = None
97
98#
99# Example of a class to handle SAX events
100#
101class SAXCallback:
102    """Base class for SAX handlers"""
103    def startDocument(self):
104        """called at the start of the document"""
105        pass
106
107    def endDocument(self):
108        """called at the end of the document"""
109        pass
110
111    def startElement(self, tag, attrs):
112        """called at the start of every element, tag is the name of
113	   the element, attrs is a dictionary of the element's attributes"""
114        pass
115
116    def endElement(self, tag):
117        """called at the start of every element, tag is the name of
118	   the element"""
119        pass
120
121    def characters(self, data):
122        """called when character data have been read, data is the string
123	   containing the data, multiple consecutive characters() callback
124	   are possible."""
125        pass
126
127    def cdataBlock(self, data):
128        """called when CDATA section have been read, data is the string
129	   containing the data, multiple consecutive cdataBlock() callback
130	   are possible."""
131        pass
132
133    def reference(self, name):
134        """called when an entity reference has been found"""
135        pass
136
137    def ignorableWhitespace(self, data):
138        """called when potentially ignorable white spaces have been found"""
139        pass
140
141    def processingInstruction(self, target, data):
142        """called when a PI has been found, target contains the PI name and
143	   data is the associated data in the PI"""
144        pass
145
146    def comment(self, content):
147        """called when a comment has been found, content contains the comment"""
148        pass
149
150    def externalSubset(self, name, externalID, systemID):
151        """called when a DOCTYPE declaration has been found, name is the
152	   DTD name and externalID, systemID are the DTD public and system
153	   identifier for that DTd if available"""
154        pass
155
156    def internalSubset(self, name, externalID, systemID):
157        """called when a DOCTYPE declaration has been found, name is the
158	   DTD name and externalID, systemID are the DTD public and system
159	   identifier for that DTD if available"""
160        pass
161
162    def entityDecl(self, name, type, externalID, systemID, content):
163        """called when an ENTITY declaration has been found, name is the
164	   entity name and externalID, systemID are the entity public and
165	   system identifier for that entity if available, type indicates
166	   the entity type, and content reports it's string content"""
167        pass
168
169    def notationDecl(self, name, externalID, systemID):
170        """called when an NOTATION declaration has been found, name is the
171	   notation name and externalID, systemID are the notation public and
172	   system identifier for that notation if available"""
173        pass
174
175    def attributeDecl(self, elem, name, type, defi, defaultValue, nameList):
176        """called when an ATTRIBUTE definition has been found"""
177	pass
178
179    def elementDecl(self, name, type, content):
180        """called when an ELEMENT definition has been found"""
181	pass
182
183    def entityDecl(self, name, publicId, systemID, notationName):
184        """called when an unparsed ENTITY declaration has been found,
185	   name is the entity name and publicId,, systemID are the entity
186	   public and system identifier for that entity if available,
187	   and notationName indicate the associated NOTATION"""
188        pass
189
190    def warning(self, msg):
191        print msg
192
193    def error(self, msg):
194        raise parserError(msg)
195
196    def fatalError(self, msg):
197        raise parserError(msg)
198
199#
200# This class is the ancestor of all the Node classes. It provides
201# the basic functionalities shared by all nodes (and handle
202# gracefylly the exception), like name, navigation in the tree,
203# doc reference, content access and serializing to a string or URI
204#
205class xmlCore:
206    def __init__(self, _obj=None):
207        if _obj != None:
208            self._o = _obj;
209            return
210        self._o = None
211
212    def __getattr__(self, attr):
213        if attr == "parent":
214            ret = libxml2mod.parent(self._o)
215            if ret == None:
216                return None
217            return xmlNode(_obj=ret)
218        elif attr == "properties":
219            ret = libxml2mod.properties(self._o)
220            if ret == None:
221                return None
222            return xmlAttr(_obj=ret)
223        elif attr == "children":
224            ret = libxml2mod.children(self._o)
225            if ret == None:
226                return None
227            return xmlNode(_obj=ret)
228        elif attr == "last":
229            ret = libxml2mod.last(self._o)
230            if ret == None:
231                return None
232            return xmlNode(_obj=ret)
233        elif attr == "next":
234            ret = libxml2mod.next(self._o)
235            if ret == None:
236                return None
237            return xmlNode(_obj=ret)
238        elif attr == "prev":
239            ret = libxml2mod.prev(self._o)
240            if ret == None:
241                return None
242            return xmlNode(_obj=ret)
243        elif attr == "content":
244            return libxml2mod.xmlNodeGetContent(self._o)
245        elif attr == "name":
246            return libxml2mod.name(self._o)
247        elif attr == "type":
248            return libxml2mod.type(self._o)
249        elif attr == "doc":
250            ret = libxml2mod.doc(self._o)
251            if ret == None:
252		if self.type == "document_xml" or self.type == "document_html":
253		    return xmlDoc(_obj=self._o)
254		else:
255		    return None
256            return xmlDoc(_obj=ret)
257        raise AttributeError,attr
258
259        #
260        # Those are common attributes to nearly all type of nodes
261        #
262    def get_parent(self):
263        ret = libxml2mod.parent(self._o)
264        if ret == None:
265            return None
266        return xmlNode(_obj=ret)
267    def get_children(self):
268        ret = libxml2mod.children(self._o)
269        if ret == None:
270            return None
271        return xmlNode(_obj=ret)
272    def get_last(self):
273        ret = libxml2mod.last(self._o)
274        if ret == None:
275            return None
276        return xmlNode(_obj=ret)
277    def get_next(self):
278        ret = libxml2mod.next(self._o)
279        if ret == None:
280            return None
281        return xmlNode(_obj=ret)
282    def get_properties(self):
283        ret = libxml2mod.properties(self._o)
284        if ret == None:
285            return None
286        return xmlAttr(_obj=ret)
287    def get_doc(self):
288        ret = libxml2mod.doc(self._o)
289        if ret == None:
290            return None
291        return xmlDoc(_obj=ret)
292    def get_prev(self):
293        ret = libxml2mod.prev(self._o)
294        if ret == None:
295            return None
296        return xmlNode(_obj=ret)
297    def get_content(self):
298        return libxml2mod.xmlNodeGetContent(self._o)
299    def getContent(self):
300        return libxml2mod.xmlNodeGetContent(self._o)
301    def get_name(self):
302        return libxml2mod.name(self._o)
303    def get_type(self):
304        return libxml2mod.type(self._o)
305    def get_doc(self):
306        ret = libxml2mod.doc(self._o)
307        if ret == None:
308            return None
309        return xmlDoc(_obj=ret)
310    def free(self):
311        libxml2mod.freeDoc(self._o)
312
313    #
314    # Serialization routines, the optional arguments have the following
315    # meaning:
316    #     encoding: string to ask saving in a specific encoding
317    #     format: if 1 the serializer is asked to indent the output
318    #
319    def serialize(self, encoding = None, format = 0):
320        return libxml2mod.serializeNode(self._o, encoding, format)
321    def saveTo(self, file, encoding = None, format = 0):
322        return libxml2mod.saveNodeTo(self._o, file, encoding, format)
323
324    #
325    # Selecting nodes using XPath, a bit slow because the context
326    # is allocated/freed every time but convenient.
327    #
328    def xpathEval(self, expr):
329	doc = self.doc
330	if doc == None:
331	    return None
332	ctxt = doc.xpathNewContext()
333	ctxt.setContextNode(self)
334	res = ctxt.xpathEval(expr)
335	ctxt.xpathFreeContext()
336	return res
337
338#
339# converters to present a nicer view of the XPath returns
340#
341def nodeWrap(o):
342    # TODO try to cast to the most appropriate node class
343    name = libxml2mod.name(o)
344    if name == "element" or name == "text":
345        return xmlNode(_obj=o)
346    if name == "attribute":
347        return xmlAttr(_obj=o)
348    if name[0:8] == "document":
349        return xmlDoc(_obj=o)
350    if name[0:8] == "namespace":
351        return xmlNs(_obj=o)
352    if name == "elem_decl":
353        return xmlElement(_obj=o)
354    if name == "attribute_decl":
355        return xmlAtribute(_obj=o)
356    if name == "entity_decl":
357        return xmlEntity(_obj=o)
358    if name == "dtd":
359        return xmlAttr(_obj=o)
360    return xmlNode(_obj=o)
361
362def xpathObjectRet(o):
363    if type(o) == type([]) or type(o) == type(()):
364        ret = map(lambda x: nodeWrap(x), o)
365        return ret
366    return o
367
368#
369# register an XPath function
370#
371def registerXPathFunction(ctxt, name, ns_uri, f):
372    ret = libxml2mod.xmlRegisterXPathFunction(ctxt, name, ns_uri, f)
373
374
375#
376# For the xmlTextReader parser configuration
377#
378PARSER_LOADDTD=1
379PARSER_DEFAULTATTRS=2
380PARSER_VALIDATE=3
381PARSER_SUBST_ENTITIES=4
382
383#
384# Everything below this point is automatically generated
385#
386
387