libxml.py revision 5439624bd9167dbb880ea4a75d91677d20a6ebe3
1import libxml2mod
2
3#
4# Errors raised by the wrappers when some tree handling failed.
5#
6class treeError:
7    def __init__(self, msg):
8        self.msg = msg
9    def __str__(self):
10        return self.msg
11
12class parserError:
13    def __init__(self, msg):
14        self.msg = msg
15    def __str__(self):
16        return self.msg
17
18class uriError:
19    def __init__(self, msg):
20        self.msg = msg
21    def __str__(self):
22        return self.msg
23
24class xpathError:
25    def __init__(self, msg):
26        self.msg = msg
27    def __str__(self):
28        return self.msg
29
30class ioWrapper:
31    def __init__(self, _obj):
32        self.__io = _obj
33        self._o = None
34
35    def io_close(self):
36        if self.__io == None:
37	    return(-1)
38	self.__io.close()
39	self.__io = None
40	return(0)
41
42    def io_flush(self):
43        if self.__io == None:
44	    return(-1)
45	self.__io.flush()
46	return(0)
47
48    def io_read(self, len = -1):
49        if self.__io == None:
50	    return(-1)
51        if len < 0:
52	    return(self.__io.read())
53	return(self.__io.read(len))
54
55    def io_write(self, str, len = -1):
56        if self.__io == None:
57	    return(-1)
58        if len < 0:
59	    return(self.__io.write(str))
60	return(self.__io.write(str, len))
61
62class ioReadWrapper(ioWrapper):
63    def __init__(self, _obj, enc = ""):
64        ioWrapper.__init__(self, _obj)
65        self._o = libxml2mod.xmlCreateInputBuffer(self, enc)
66
67    def __del__(self):
68        print "__del__"
69        self.io_close()
70        if self._o != None:
71            libxml2mod.xmlFreeParserInputBuffer(self._o)
72        self._o = None
73
74    def close(self):
75        self.io_close()
76        if self._o != None:
77            libxml2mod.xmlFreeParserInputBuffer(self._o)
78        self._o = None
79
80class ioWriteWrapper(ioWrapper):
81    def __init__(self, _obj, enc = ""):
82        ioWrapper.__init__(self, _obj)
83        self._o = libxml2mod.xmlCreateOutputBuffer(self, enc)
84
85    def __del__(self):
86        print "__del__"
87        self.io_close()
88        if self._o != None:
89            libxml2mod.xmlOutputBufferClose(self._o)
90        self._o = None
91
92    def close(self):
93        self.io_close()
94        if self._o != None:
95            libxml2mod.xmlOutputBufferClose(self._o)
96        self._o = None
97
98#
99# Example of a class to handle SAX events
100#
101class SAXCallback:
102    """Base class for SAX handlers"""
103    def startDocument(self):
104        """called at the start of the document"""
105        pass
106
107    def endDocument(self):
108        """called at the end of the document"""
109        pass
110
111    def startElement(self, tag, attrs):
112        """called at the start of every element, tag is the name of
113	   the element, attrs is a dictionary of the element's attributes"""
114        pass
115
116    def endElement(self, tag):
117        """called at the start of every element, tag is the name of
118	   the element"""
119        pass
120
121    def characters(self, data):
122        """called when character data have been read, data is the string
123	   containing the data, multiple consecutive characters() callback
124	   are possible."""
125        pass
126
127    def cdataBlock(self, data):
128        """called when CDATA section have been read, data is the string
129	   containing the data, multiple consecutive cdataBlock() callback
130	   are possible."""
131        pass
132
133    def reference(self, name):
134        """called when an entity reference has been found"""
135        pass
136
137    def ignorableWhitespace(self, data):
138        """called when potentially ignorable white spaces have been found"""
139        pass
140
141    def processingInstruction(self, target, data):
142        """called when a PI has been found, target contains the PI name and
143	   data is the associated data in the PI"""
144        pass
145
146    def comment(self, content):
147        """called when a comment has been found, content contains the comment"""
148        pass
149
150    def externalSubset(self, name, externalID, systemID):
151        """called when a DOCTYPE declaration has been found, name is the
152	   DTD name and externalID, systemID are the DTD public and system
153	   identifier for that DTd if available"""
154        pass
155
156    def internalSubset(self, name, externalID, systemID):
157        """called when a DOCTYPE declaration has been found, name is the
158	   DTD name and externalID, systemID are the DTD public and system
159	   identifier for that DTD if available"""
160        pass
161
162    def entityDecl(self, name, type, externalID, systemID, content):
163        """called when an ENTITY declaration has been found, name is the
164	   entity name and externalID, systemID are the entity public and
165	   system identifier for that entity if available, type indicates
166	   the entity type, and content reports it's string content"""
167        pass
168
169    def notationDecl(self, name, externalID, systemID):
170        """called when an NOTATION declaration has been found, name is the
171	   notation name and externalID, systemID are the notation public and
172	   system identifier for that notation if available"""
173        pass
174
175    def attributeDecl(self, elem, name, type, defi, defaultValue, nameList):
176        """called when an ATTRIBUTE definition has been found"""
177	pass
178
179    def elementDecl(self, name, type, content):
180        """called when an ELEMENT definition has been found"""
181	pass
182
183    def entityDecl(self, name, publicId, systemID, notationName):
184        """called when an unparsed ENTITY declaration has been found,
185	   name is the entity name and publicId,, systemID are the entity
186	   public and system identifier for that entity if available,
187	   and notationName indicate the associated NOTATION"""
188        pass
189
190    def warning(self, msg):
191        print msg
192
193    def error(self, msg):
194        raise parserError(msg)
195
196    def fatalError(self, msg):
197        raise parserError(msg)
198
199#
200# This class is the ancestor of all the Node classes. It provides
201# the basic functionalities shared by all nodes (and handle
202# gracefylly the exception), like name, navigation in the tree,
203# doc reference, content access and serializing to a string or URI
204#
205class xmlCore:
206    def __init__(self, _obj=None):
207        if _obj != None:
208            self._o = _obj;
209            return
210        self._o = None
211    def get_parent(self):
212        ret = libxml2mod.parent(self._o)
213        if ret == None:
214            return None
215        return xmlNode(_obj=ret)
216    def get_children(self):
217        ret = libxml2mod.children(self._o)
218        if ret == None:
219            return None
220        return xmlNode(_obj=ret)
221    def get_last(self):
222        ret = libxml2mod.last(self._o)
223        if ret == None:
224            return None
225        return xmlNode(_obj=ret)
226    def get_next(self):
227        ret = libxml2mod.next(self._o)
228        if ret == None:
229            return None
230        return xmlNode(_obj=ret)
231    def get_properties(self):
232        ret = libxml2mod.properties(self._o)
233        if ret == None:
234            return None
235        return xmlAttr(_obj=ret)
236    def get_prev(self):
237        ret = libxml2mod.prev(self._o)
238        if ret == None:
239            return None
240        return xmlNode(_obj=ret)
241    def get_content(self):
242        return libxml2mod.xmlNodeGetContent(self._o)
243    getContent = get_content  # why is this duplicate naming needed ?
244    def get_name(self):
245        return libxml2mod.name(self._o)
246    def get_type(self):
247        return libxml2mod.type(self._o)
248    def get_doc(self):
249        ret = libxml2mod.doc(self._o)
250        if ret == None:
251            if self.type in ["document_xml", "document_html"]:
252                return xmlDoc(_obj=self._o)
253            else:
254                return None
255        return xmlDoc(_obj=ret)
256    #
257    # Those are common attributes to nearly all type of nodes
258    # defined as python2 properties
259    #
260    import sys
261    if float(sys.version[0:3]) < 2.2:
262	def __getattr__(self, attr):
263	    if attr == "parent":
264		ret = libxml2mod.parent(self._o)
265		if ret == None:
266		    return None
267		return xmlNode(_obj=ret)
268	    elif attr == "properties":
269		ret = libxml2mod.properties(self._o)
270		if ret == None:
271		    return None
272		return xmlAttr(_obj=ret)
273	    elif attr == "children":
274		ret = libxml2mod.children(self._o)
275		if ret == None:
276		    return None
277		return xmlNode(_obj=ret)
278	    elif attr == "last":
279		ret = libxml2mod.last(self._o)
280		if ret == None:
281		    return None
282		return xmlNode(_obj=ret)
283	    elif attr == "next":
284		ret = libxml2mod.next(self._o)
285		if ret == None:
286		    return None
287		return xmlNode(_obj=ret)
288	    elif attr == "prev":
289		ret = libxml2mod.prev(self._o)
290		if ret == None:
291		    return None
292		return xmlNode(_obj=ret)
293	    elif attr == "content":
294		return libxml2mod.xmlNodeGetContent(self._o)
295	    elif attr == "name":
296		return libxml2mod.name(self._o)
297	    elif attr == "type":
298		return libxml2mod.type(self._o)
299	    elif attr == "doc":
300		ret = libxml2mod.doc(self._o)
301		if ret == None:
302		    if self.type == "document_xml" or self.type == "document_html":
303			return xmlDoc(_obj=self._o)
304		    else:
305			return None
306		return xmlDoc(_obj=ret)
307	    raise AttributeError,attr
308    else:
309	parent = property(get_parent, None, None, "Parent node")
310	children = property(get_children, None, None, "First child node")
311	last = property(get_last, None, None, "Last sibling node")
312	next = property(get_next, None, None, "Next sibling node")
313	prev = property(get_prev, None, None, "Previous sibling node")
314	properties = property(get_properties, None, None, "List of properies")
315	content = property(get_content, None, None, "Content of this node")
316	name = property(get_name, None, None, "Node name")
317	type = property(get_type, None, None, "Node type")
318	doc = property(get_doc, None, None, "The document this node belongs to")
319
320    #
321    # Serialization routines, the optional arguments have the following
322    # meaning:
323    #     encoding: string to ask saving in a specific encoding
324    #     indent: if 1 the serializer is asked to indent the output
325    #
326    def serialize(self, encoding = None, format = 0):
327        return libxml2mod.serializeNode(self._o, encoding, format)
328    def saveTo(self, file, encoding = None, format = 0):
329        return libxml2mod.saveNodeTo(self._o, file, encoding, format)
330
331    #
332    # Selecting nodes using XPath, a bit slow because the context
333    # is allocated/freed every time but convenient.
334    #
335    def xpathEval(self, expr):
336	doc = self.doc
337	if doc == None:
338	    return None
339	ctxt = doc.xpathNewContext()
340	ctxt.setContextNode(self)
341	res = ctxt.xpathEval(expr)
342	ctxt.xpathFreeContext()
343	return res
344
345    #
346    # Selecting nodes using XPath, faster because the context
347    # is allocated just once per xmlDoc.
348    #
349    def xpathEval2(self, expr):
350	doc = self.doc
351	if doc == None:
352	    return None
353        try:
354            doc._ctxt.setContextNode(self)
355        except:
356            doc._ctxt = doc.xpathNewContext()
357            doc._ctxt.setContextNode(self)
358	res = doc._ctxt.xpathEval(expr)
359	return res
360
361    # support for python2 iterators
362    def walk_depth_first(self):
363        return xmlCoreDepthFirstItertor(self)
364    def walk_breadth_first(self):
365        return xmlCoreBreadthFirstItertor(self)
366    __iter__ = walk_depth_first
367
368    def free(self):
369        try:
370            self.doc._ctxt.xpathFreeContext()
371        except:
372            pass
373        libxml2mod.freeDoc(self._o)
374
375
376#
377# implements the depth-first iterator for libxml2 DOM tree
378#
379class xmlCoreDepthFirstItertor:
380    def __init__(self, node):
381        self.node = node
382        self.parents = []
383    def __iter__(self):
384        return self
385    def next(self):
386        while 1:
387            if self.node:
388                ret = self.node
389                self.parents.append(self.node)
390                self.node = self.node.children
391                return ret
392            try:
393                parent = self.parents.pop()
394            except IndexError:
395                raise StopIteration
396            self.node = parent.next
397
398#
399# implements the breadth-first iterator for libxml2 DOM tree
400#
401class xmlCoreBreadthFirstItertor:
402    def __init__(self, node):
403        self.node = node
404        self.parents = []
405    def __iter__(self):
406        return self
407    def next(self):
408        while 1:
409            if self.node:
410                ret = self.node
411                self.parents.append(self.node)
412                self.node = self.node.next
413                return ret
414            try:
415                parent = self.parents.pop()
416            except IndexError:
417                raise StopIteration
418            self.node = parent.children
419
420#
421# converters to present a nicer view of the XPath returns
422#
423def nodeWrap(o):
424    # TODO try to cast to the most appropriate node class
425    name = libxml2mod.name(o)
426    if name == "element" or name == "text":
427        return xmlNode(_obj=o)
428    if name == "attribute":
429        return xmlAttr(_obj=o)
430    if name[0:8] == "document":
431        return xmlDoc(_obj=o)
432    if name[0:8] == "namespace":
433        return xmlNs(_obj=o)
434    if name == "elem_decl":
435        return xmlElement(_obj=o)
436    if name == "attribute_decl":
437        return xmlAtribute(_obj=o)
438    if name == "entity_decl":
439        return xmlEntity(_obj=o)
440    if name == "dtd":
441        return xmlDtd(_obj=o)
442    return xmlNode(_obj=o)
443
444def xpathObjectRet(o):
445    if type(o) == type([]) or type(o) == type(()):
446        ret = map(lambda x: nodeWrap(x), o)
447        return ret
448    return o
449
450#
451# register an XPath function
452#
453def registerXPathFunction(ctxt, name, ns_uri, f):
454    ret = libxml2mod.xmlRegisterXPathFunction(ctxt, name, ns_uri, f)
455
456#
457# For the xmlTextReader parser configuration
458#
459PARSER_LOADDTD=1
460PARSER_DEFAULTATTRS=2
461PARSER_VALIDATE=3
462PARSER_SUBST_ENTITIES=4
463
464#
465# For the error callback severities
466#
467PARSER_SEVERITY_VALIDITY_WARNING=1
468PARSER_SEVERITY_VALIDITY_ERROR=2
469PARSER_SEVERITY_WARNING=3
470PARSER_SEVERITY_ERROR=4
471
472#
473# register the libxml2 error handler
474#
475def registerErrorHandler(f, ctx):
476    """Register a Python written function to for error reporting.
477       The function is called back as f(ctx, error). """
478    import sys
479    if not sys.modules.has_key('libxslt'):
480        # normal behaviour when libxslt is not imported
481        ret = libxml2mod.xmlRegisterErrorHandler(f,ctx)
482    else:
483        # when libxslt is already imported, one must
484        # use libxst's error handler instead
485        import libxslt
486        ret = libxslt.registerErrorHandler(f,ctx)
487    return ret
488
489class parserCtxtCore:
490
491    def __init__(self, _obj=None):
492        if _obj != None:
493            self._o = _obj;
494            return
495        self._o = None
496
497    def __del__(self):
498        if self._o != None:
499            libxml2mod.xmlFreeParserCtxt(self._o)
500	self._o = None
501
502    def setErrorHandler(self,f,arg):
503        """Register an error handler that will be called back as
504           f(arg,msg,severity,reserved).
505
506           @reserved is currently always None."""
507        libxml2mod.xmlParserCtxtSetErrorHandler(self._o,f,arg)
508
509    def getErrorHandler(self):
510        """Return (f,arg) as previously registered with setErrorHandler
511           or (None,None)."""
512        return libxml2mod.xmlParserCtxtGetErrorHandler(self._o)
513
514    def addLocalCatalog(self, uri):
515        """Register a local catalog with the parser"""
516        return libxml2mod.addLocalCatalog(self._o, uri)
517
518
519def _xmlTextReaderErrorFunc((f,arg),msg,severity,locator):
520    """Intermediate callback to wrap the locator"""
521    return f(arg,msg,severity,xmlTextReaderLocator(locator))
522
523class xmlTextReaderCore:
524
525    def __init__(self, _obj=None):
526        self.input = None
527        if _obj != None:self._o = _obj;return
528        self._o = None
529
530    def __del__(self):
531        if self._o != None:
532            libxml2mod.xmlFreeTextReader(self._o)
533        self._o = None
534
535    def SetErrorHandler(self,f,arg):
536        """Register an error handler that will be called back as
537           f(arg,msg,severity,locator)."""
538        if f is None:
539            libxml2mod.xmlTextReaderSetErrorHandler(\
540                self._o,None,None)
541        else:
542            libxml2mod.xmlTextReaderSetErrorHandler(\
543                self._o,_xmlTextReaderErrorFunc,(f,arg))
544
545    def GetErrorHandler(self):
546        """Return (f,arg) as previously registered with setErrorHandler
547           or (None,None)."""
548        f,arg = libxml2mod.xmlTextReaderGetErrorHandler(self._o)
549        if f is None:
550            return None,None
551        else:
552            # assert f is _xmlTextReaderErrorFunc
553            return arg
554
555# WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
556#
557# Everything before this line comes from libxml.py
558# Everything after this line is automatically generated
559#
560# WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
561
562