libxml.py revision 87ab1c129868978f1806da36496c8c519852c6ad
1import libxml2mod
2import types
3
4#
5# Errors raised by the wrappers when some tree handling failed.
6#
7class treeError:
8    def __init__(self, msg):
9        self.msg = msg
10    def __str__(self):
11        return self.msg
12
13class parserError:
14    def __init__(self, msg):
15        self.msg = msg
16    def __str__(self):
17        return self.msg
18
19class uriError:
20    def __init__(self, msg):
21        self.msg = msg
22    def __str__(self):
23        return self.msg
24
25class xpathError:
26    def __init__(self, msg):
27        self.msg = msg
28    def __str__(self):
29        return self.msg
30
31class ioWrapper:
32    def __init__(self, _obj):
33        self.__io = _obj
34        self._o = None
35
36    def io_close(self):
37        if self.__io == None:
38            return(-1)
39        self.__io.close()
40        self.__io = None
41        return(0)
42
43    def io_flush(self):
44        if self.__io == None:
45            return(-1)
46        self.__io.flush()
47        return(0)
48
49    def io_read(self, len = -1):
50        if self.__io == None:
51            return(-1)
52        if len < 0:
53            return(self.__io.read())
54        return(self.__io.read(len))
55
56    def io_write(self, str, len = -1):
57        if self.__io == None:
58            return(-1)
59        if len < 0:
60            return(self.__io.write(str))
61        return(self.__io.write(str, len))
62
63class ioReadWrapper(ioWrapper):
64    def __init__(self, _obj, enc = ""):
65        ioWrapper.__init__(self, _obj)
66        self._o = libxml2mod.xmlCreateInputBuffer(self, enc)
67
68    def __del__(self):
69        print "__del__"
70        self.io_close()
71        if self._o != None:
72            libxml2mod.xmlFreeParserInputBuffer(self._o)
73        self._o = None
74
75    def close(self):
76        self.io_close()
77        if self._o != None:
78            libxml2mod.xmlFreeParserInputBuffer(self._o)
79        self._o = None
80
81class ioWriteWrapper(ioWrapper):
82    def __init__(self, _obj, enc = ""):
83#        print "ioWriteWrapper.__init__", _obj
84        if type(_obj) == type(''):
85	    print "write io from a string"
86	    self.o = None
87	elif type(_obj) == types.InstanceType:
88	    print "write io from instance of %s" % (_obj.__class__)
89	    ioWrapper.__init__(self, _obj)
90	    self._o = libxml2mod.xmlCreateOutputBuffer(self, enc)
91	else:
92	    file = libxml2mod.outputBufferGetPythonFile(_obj)
93	    if file != None:
94		ioWrapper.__init__(self, file)
95	    else:
96	        ioWrapper.__init__(self, _obj)
97	    self._o = _obj
98
99    def __del__(self):
100#        print "__del__"
101        self.io_close()
102        if self._o != None:
103            libxml2mod.xmlOutputBufferClose(self._o)
104        self._o = None
105
106    def flush(self):
107        self.io_flush()
108        if self._o != None:
109            libxml2mod.xmlOutputBufferClose(self._o)
110        self._o = None
111
112    def close(self):
113        self.io_flush()
114        if self._o != None:
115            libxml2mod.xmlOutputBufferClose(self._o)
116        self._o = None
117
118#
119# Example of a class to handle SAX events
120#
121class SAXCallback:
122    """Base class for SAX handlers"""
123    def startDocument(self):
124        """called at the start of the document"""
125        pass
126
127    def endDocument(self):
128        """called at the end of the document"""
129        pass
130
131    def startElement(self, tag, attrs):
132        """called at the start of every element, tag is the name of
133           the element, attrs is a dictionary of the element's attributes"""
134        pass
135
136    def endElement(self, tag):
137        """called at the start of every element, tag is the name of
138           the element"""
139        pass
140
141    def characters(self, data):
142        """called when character data have been read, data is the string
143           containing the data, multiple consecutive characters() callback
144           are possible."""
145        pass
146
147    def cdataBlock(self, data):
148        """called when CDATA section have been read, data is the string
149           containing the data, multiple consecutive cdataBlock() callback
150           are possible."""
151        pass
152
153    def reference(self, name):
154        """called when an entity reference has been found"""
155        pass
156
157    def ignorableWhitespace(self, data):
158        """called when potentially ignorable white spaces have been found"""
159        pass
160
161    def processingInstruction(self, target, data):
162        """called when a PI has been found, target contains the PI name and
163           data is the associated data in the PI"""
164        pass
165
166    def comment(self, content):
167        """called when a comment has been found, content contains the comment"""
168        pass
169
170    def externalSubset(self, name, externalID, systemID):
171        """called when a DOCTYPE declaration has been found, name is the
172           DTD name and externalID, systemID are the DTD public and system
173           identifier for that DTd if available"""
174        pass
175
176    def internalSubset(self, name, externalID, systemID):
177        """called when a DOCTYPE declaration has been found, name is the
178           DTD name and externalID, systemID are the DTD public and system
179           identifier for that DTD if available"""
180        pass
181
182    def entityDecl(self, name, type, externalID, systemID, content):
183        """called when an ENTITY declaration has been found, name is the
184           entity name and externalID, systemID are the entity public and
185           system identifier for that entity if available, type indicates
186           the entity type, and content reports it's string content"""
187        pass
188
189    def notationDecl(self, name, externalID, systemID):
190        """called when an NOTATION declaration has been found, name is the
191           notation name and externalID, systemID are the notation public and
192           system identifier for that notation if available"""
193        pass
194
195    def attributeDecl(self, elem, name, type, defi, defaultValue, nameList):
196        """called when an ATTRIBUTE definition has been found"""
197        pass
198
199    def elementDecl(self, name, type, content):
200        """called when an ELEMENT definition has been found"""
201        pass
202
203    def entityDecl(self, name, publicId, systemID, notationName):
204        """called when an unparsed ENTITY declaration has been found,
205           name is the entity name and publicId,, systemID are the entity
206           public and system identifier for that entity if available,
207           and notationName indicate the associated NOTATION"""
208        pass
209
210    def warning(self, msg):
211        print msg
212
213    def error(self, msg):
214        raise parserError(msg)
215
216    def fatalError(self, msg):
217        raise parserError(msg)
218
219#
220# This class is the ancestor of all the Node classes. It provides
221# the basic functionalities shared by all nodes (and handle
222# gracefylly the exception), like name, navigation in the tree,
223# doc reference, content access and serializing to a string or URI
224#
225class xmlCore:
226    def __init__(self, _obj=None):
227        if _obj != None:
228            self._o = _obj;
229            return
230        self._o = None
231    def get_parent(self):
232        ret = libxml2mod.parent(self._o)
233        if ret == None:
234            return None
235        return xmlNode(_obj=ret)
236    def get_children(self):
237        ret = libxml2mod.children(self._o)
238        if ret == None:
239            return None
240        return xmlNode(_obj=ret)
241    def get_last(self):
242        ret = libxml2mod.last(self._o)
243        if ret == None:
244            return None
245        return xmlNode(_obj=ret)
246    def get_next(self):
247        ret = libxml2mod.next(self._o)
248        if ret == None:
249            return None
250        return xmlNode(_obj=ret)
251    def get_properties(self):
252        ret = libxml2mod.properties(self._o)
253        if ret == None:
254            return None
255        return xmlAttr(_obj=ret)
256    def get_prev(self):
257        ret = libxml2mod.prev(self._o)
258        if ret == None:
259            return None
260        return xmlNode(_obj=ret)
261    def get_content(self):
262        return libxml2mod.xmlNodeGetContent(self._o)
263    getContent = get_content  # why is this duplicate naming needed ?
264    def get_name(self):
265        return libxml2mod.name(self._o)
266    def get_type(self):
267        return libxml2mod.type(self._o)
268    def get_doc(self):
269        ret = libxml2mod.doc(self._o)
270        if ret == None:
271            if self.type in ["document_xml", "document_html"]:
272                return xmlDoc(_obj=self._o)
273            else:
274                return None
275        return xmlDoc(_obj=ret)
276    #
277    # Those are common attributes to nearly all type of nodes
278    # defined as python2 properties
279    #
280    import sys
281    if float(sys.version[0:3]) < 2.2:
282        def __getattr__(self, attr):
283            if attr == "parent":
284                ret = libxml2mod.parent(self._o)
285                if ret == None:
286                    return None
287                return xmlNode(_obj=ret)
288            elif attr == "properties":
289                ret = libxml2mod.properties(self._o)
290                if ret == None:
291                    return None
292                return xmlAttr(_obj=ret)
293            elif attr == "children":
294                ret = libxml2mod.children(self._o)
295                if ret == None:
296                    return None
297                return xmlNode(_obj=ret)
298            elif attr == "last":
299                ret = libxml2mod.last(self._o)
300                if ret == None:
301                    return None
302                return xmlNode(_obj=ret)
303            elif attr == "next":
304                ret = libxml2mod.next(self._o)
305                if ret == None:
306                    return None
307                return xmlNode(_obj=ret)
308            elif attr == "prev":
309                ret = libxml2mod.prev(self._o)
310                if ret == None:
311                    return None
312                return xmlNode(_obj=ret)
313            elif attr == "content":
314                return libxml2mod.xmlNodeGetContent(self._o)
315            elif attr == "name":
316                return libxml2mod.name(self._o)
317            elif attr == "type":
318                return libxml2mod.type(self._o)
319            elif attr == "doc":
320                ret = libxml2mod.doc(self._o)
321                if ret == None:
322                    if self.type == "document_xml" or self.type == "document_html":
323                        return xmlDoc(_obj=self._o)
324                    else:
325                        return None
326                return xmlDoc(_obj=ret)
327            raise AttributeError,attr
328    else:
329        parent = property(get_parent, None, None, "Parent node")
330        children = property(get_children, None, None, "First child node")
331        last = property(get_last, None, None, "Last sibling node")
332        next = property(get_next, None, None, "Next sibling node")
333        prev = property(get_prev, None, None, "Previous sibling node")
334        properties = property(get_properties, None, None, "List of properies")
335        content = property(get_content, None, None, "Content of this node")
336        name = property(get_name, None, None, "Node name")
337        type = property(get_type, None, None, "Node type")
338        doc = property(get_doc, None, None, "The document this node belongs to")
339
340    #
341    # Serialization routines, the optional arguments have the following
342    # meaning:
343    #     encoding: string to ask saving in a specific encoding
344    #     indent: if 1 the serializer is asked to indent the output
345    #
346    def serialize(self, encoding = None, format = 0):
347        return libxml2mod.serializeNode(self._o, encoding, format)
348    def saveTo(self, file, encoding = None, format = 0):
349        return libxml2mod.saveNodeTo(self._o, file, encoding, format)
350
351    #
352    # Selecting nodes using XPath, a bit slow because the context
353    # is allocated/freed every time but convenient.
354    #
355    def xpathEval(self, expr):
356        doc = self.doc
357        if doc == None:
358            return None
359        ctxt = doc.xpathNewContext()
360        ctxt.setContextNode(self)
361        res = ctxt.xpathEval(expr)
362        ctxt.xpathFreeContext()
363        return res
364
365#    #
366#    # Selecting nodes using XPath, faster because the context
367#    # is allocated just once per xmlDoc.
368#    #
369#    # Removed: DV memleaks c.f. #126735
370#    #
371#    def xpathEval2(self, expr):
372#        doc = self.doc
373#        if doc == None:
374#            return None
375#        try:
376#            doc._ctxt.setContextNode(self)
377#        except:
378#            doc._ctxt = doc.xpathNewContext()
379#            doc._ctxt.setContextNode(self)
380#        res = doc._ctxt.xpathEval(expr)
381#        return res
382    def xpathEval2(self, expr):
383        return self.xpathEval(expr)
384
385    # support for python2 iterators
386    def walk_depth_first(self):
387        return xmlCoreDepthFirstItertor(self)
388    def walk_breadth_first(self):
389        return xmlCoreBreadthFirstItertor(self)
390    __iter__ = walk_depth_first
391
392    def free(self):
393        try:
394            self.doc._ctxt.xpathFreeContext()
395        except:
396            pass
397        libxml2mod.xmlFreeDoc(self._o)
398
399
400#
401# implements the depth-first iterator for libxml2 DOM tree
402#
403class xmlCoreDepthFirstItertor:
404    def __init__(self, node):
405        self.node = node
406        self.parents = []
407    def __iter__(self):
408        return self
409    def next(self):
410        while 1:
411            if self.node:
412                ret = self.node
413                self.parents.append(self.node)
414                self.node = self.node.children
415                return ret
416            try:
417                parent = self.parents.pop()
418            except IndexError:
419                raise StopIteration
420            self.node = parent.next
421
422#
423# implements the breadth-first iterator for libxml2 DOM tree
424#
425class xmlCoreBreadthFirstItertor:
426    def __init__(self, node):
427        self.node = node
428        self.parents = []
429    def __iter__(self):
430        return self
431    def next(self):
432        while 1:
433            if self.node:
434                ret = self.node
435                self.parents.append(self.node)
436                self.node = self.node.next
437                return ret
438            try:
439                parent = self.parents.pop()
440            except IndexError:
441                raise StopIteration
442            self.node = parent.children
443
444#
445# converters to present a nicer view of the XPath returns
446#
447def nodeWrap(o):
448    # TODO try to cast to the most appropriate node class
449    name = libxml2mod.name(o)
450    if name == "element" or name == "text":
451        return xmlNode(_obj=o)
452    if name == "attribute":
453        return xmlAttr(_obj=o)
454    if name[0:8] == "document":
455        return xmlDoc(_obj=o)
456    if name[0:8] == "namespace":
457        return xmlNs(_obj=o)
458    if name == "elem_decl":
459        return xmlElement(_obj=o)
460    if name == "attribute_decl":
461        return xmlAtribute(_obj=o)
462    if name == "entity_decl":
463        return xmlEntity(_obj=o)
464    if name == "dtd":
465        return xmlDtd(_obj=o)
466    return xmlNode(_obj=o)
467
468def xpathObjectRet(o):
469    if type(o) == type([]) or type(o) == type(()):
470        ret = map(lambda x: nodeWrap(x), o)
471        return ret
472    return o
473
474#
475# register an XPath function
476#
477def registerXPathFunction(ctxt, name, ns_uri, f):
478    ret = libxml2mod.xmlRegisterXPathFunction(ctxt, name, ns_uri, f)
479
480#
481# For the xmlTextReader parser configuration
482#
483PARSER_LOADDTD=1
484PARSER_DEFAULTATTRS=2
485PARSER_VALIDATE=3
486PARSER_SUBST_ENTITIES=4
487
488#
489# For the error callback severities
490#
491PARSER_SEVERITY_VALIDITY_WARNING=1
492PARSER_SEVERITY_VALIDITY_ERROR=2
493PARSER_SEVERITY_WARNING=3
494PARSER_SEVERITY_ERROR=4
495
496#
497# register the libxml2 error handler
498#
499def registerErrorHandler(f, ctx):
500    """Register a Python written function to for error reporting.
501       The function is called back as f(ctx, error). """
502    import sys
503    if not sys.modules.has_key('libxslt'):
504        # normal behaviour when libxslt is not imported
505        ret = libxml2mod.xmlRegisterErrorHandler(f,ctx)
506    else:
507        # when libxslt is already imported, one must
508        # use libxst's error handler instead
509        import libxslt
510        ret = libxslt.registerErrorHandler(f,ctx)
511    return ret
512
513class parserCtxtCore:
514
515    def __init__(self, _obj=None):
516        if _obj != None:
517            self._o = _obj;
518            return
519        self._o = None
520
521    def __del__(self):
522        if self._o != None:
523            libxml2mod.xmlFreeParserCtxt(self._o)
524        self._o = None
525
526    def setErrorHandler(self,f,arg):
527        """Register an error handler that will be called back as
528           f(arg,msg,severity,reserved).
529
530           @reserved is currently always None."""
531        libxml2mod.xmlParserCtxtSetErrorHandler(self._o,f,arg)
532
533    def getErrorHandler(self):
534        """Return (f,arg) as previously registered with setErrorHandler
535           or (None,None)."""
536        return libxml2mod.xmlParserCtxtGetErrorHandler(self._o)
537
538    def addLocalCatalog(self, uri):
539        """Register a local catalog with the parser"""
540        return libxml2mod.addLocalCatalog(self._o, uri)
541
542
543def _xmlTextReaderErrorFunc((f,arg),msg,severity,locator):
544    """Intermediate callback to wrap the locator"""
545    return f(arg,msg,severity,xmlTextReaderLocator(locator))
546
547class xmlTextReaderCore:
548
549    def __init__(self, _obj=None):
550        self.input = None
551        if _obj != None:self._o = _obj;return
552        self._o = None
553
554    def __del__(self):
555        if self._o != None:
556            libxml2mod.xmlFreeTextReader(self._o)
557        self._o = None
558
559    def SetErrorHandler(self,f,arg):
560        """Register an error handler that will be called back as
561           f(arg,msg,severity,locator)."""
562        if f is None:
563            libxml2mod.xmlTextReaderSetErrorHandler(\
564                self._o,None,None)
565        else:
566            libxml2mod.xmlTextReaderSetErrorHandler(\
567                self._o,_xmlTextReaderErrorFunc,(f,arg))
568
569    def GetErrorHandler(self):
570        """Return (f,arg) as previously registered with setErrorHandler
571           or (None,None)."""
572        f,arg = libxml2mod.xmlTextReaderGetErrorHandler(self._o)
573        if f is None:
574            return None,None
575        else:
576            # assert f is _xmlTextReaderErrorFunc
577            return arg
578
579
580# WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
581#
582# Everything before this line comes from libxml.py
583# Everything after this line is automatically generated
584#
585# WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
586
587