1#
2# ElementTree
3# $Id: ElementTree.py 3440 2008-07-18 14:45:01Z fredrik $
4#
5# light-weight XML support for Python 2.3 and later.
6#
7# history (since 1.2.6):
8# 2005-11-12 fl   added tostringlist/fromstringlist helpers
9# 2006-07-05 fl   merged in selected changes from the 1.3 sandbox
10# 2006-07-05 fl   removed support for 2.1 and earlier
11# 2007-06-21 fl   added deprecation/future warnings
12# 2007-08-25 fl   added doctype hook, added parser version attribute etc
13# 2007-08-26 fl   added new serializer code (better namespace handling, etc)
14# 2007-08-27 fl   warn for broken /tag searches on tree level
15# 2007-09-02 fl   added html/text methods to serializer (experimental)
16# 2007-09-05 fl   added method argument to tostring/tostringlist
17# 2007-09-06 fl   improved error handling
18# 2007-09-13 fl   added itertext, iterfind; assorted cleanups
19# 2007-12-15 fl   added C14N hooks, copy method (experimental)
20#
21# Copyright (c) 1999-2008 by Fredrik Lundh.  All rights reserved.
22#
23# fredrik@pythonware.com
24# http://www.pythonware.com
25#
26# --------------------------------------------------------------------
27# The ElementTree toolkit is
28#
29# Copyright (c) 1999-2008 by Fredrik Lundh
30#
31# By obtaining, using, and/or copying this software and/or its
32# associated documentation, you agree that you have read, understood,
33# and will comply with the following terms and conditions:
34#
35# Permission to use, copy, modify, and distribute this software and
36# its associated documentation for any purpose and without fee is
37# hereby granted, provided that the above copyright notice appears in
38# all copies, and that both that copyright notice and this permission
39# notice appear in supporting documentation, and that the name of
40# Secret Labs AB or the author not be used in advertising or publicity
41# pertaining to distribution of the software without specific, written
42# prior permission.
43#
44# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
45# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
46# ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
47# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
48# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
49# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
50# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
51# OF THIS SOFTWARE.
52# --------------------------------------------------------------------
53
54# Licensed to PSF under a Contributor Agreement.
55# See http://www.python.org/psf/license for licensing details.
56
57__all__ = [
58    # public symbols
59    "Comment",
60    "dump",
61    "Element", "ElementTree",
62    "fromstring", "fromstringlist",
63    "iselement", "iterparse",
64    "parse", "ParseError",
65    "PI", "ProcessingInstruction",
66    "QName",
67    "SubElement",
68    "tostring", "tostringlist",
69    "TreeBuilder",
70    "VERSION",
71    "XML",
72    "XMLParser", "XMLTreeBuilder",
73    ]
74
75VERSION = "1.3.0"
76
77##
78# The <b>Element</b> type is a flexible container object, designed to
79# store hierarchical data structures in memory. The type can be
80# described as a cross between a list and a dictionary.
81# <p>
82# Each element has a number of properties associated with it:
83# <ul>
84# <li>a <i>tag</i>. This is a string identifying what kind of data
85# this element represents (the element type, in other words).</li>
86# <li>a number of <i>attributes</i>, stored in a Python dictionary.</li>
87# <li>a <i>text</i> string.</li>
88# <li>an optional <i>tail</i> string.</li>
89# <li>a number of <i>child elements</i>, stored in a Python sequence</li>
90# </ul>
91#
92# To create an element instance, use the {@link #Element} constructor
93# or the {@link #SubElement} factory function.
94# <p>
95# The {@link #ElementTree} class can be used to wrap an element
96# structure, and convert it from and to XML.
97##
98
99import sys
100import re
101import warnings
102
103
104class _SimpleElementPath(object):
105    # emulate pre-1.2 find/findtext/findall behaviour
106    def find(self, element, tag, namespaces=None):
107        for elem in element:
108            if elem.tag == tag:
109                return elem
110        return None
111    def findtext(self, element, tag, default=None, namespaces=None):
112        elem = self.find(element, tag)
113        if elem is None:
114            return default
115        return elem.text or ""
116    def iterfind(self, element, tag, namespaces=None):
117        if tag[:3] == ".//":
118            for elem in element.iter(tag[3:]):
119                yield elem
120        for elem in element:
121            if elem.tag == tag:
122                yield elem
123    def findall(self, element, tag, namespaces=None):
124        return list(self.iterfind(element, tag, namespaces))
125
126try:
127    from . import ElementPath
128except ImportError:
129    ElementPath = _SimpleElementPath()
130
131##
132# Parser error.  This is a subclass of <b>SyntaxError</b>.
133# <p>
134# In addition to the exception value, an exception instance contains a
135# specific exception code in the <b>code</b> attribute, and the line and
136# column of the error in the <b>position</b> attribute.
137
138class ParseError(SyntaxError):
139    pass
140
141# --------------------------------------------------------------------
142
143##
144# Checks if an object appears to be a valid element object.
145#
146# @param An element instance.
147# @return A true value if this is an element object.
148# @defreturn flag
149
150def iselement(element):
151    # FIXME: not sure about this; might be a better idea to look
152    # for tag/attrib/text attributes
153    return isinstance(element, Element) or hasattr(element, "tag")
154
155##
156# Element class.  This class defines the Element interface, and
157# provides a reference implementation of this interface.
158# <p>
159# The element name, attribute names, and attribute values can be
160# either ASCII strings (ordinary Python strings containing only 7-bit
161# ASCII characters) or Unicode strings.
162#
163# @param tag The element name.
164# @param attrib An optional dictionary, containing element attributes.
165# @param **extra Additional attributes, given as keyword arguments.
166# @see Element
167# @see SubElement
168# @see Comment
169# @see ProcessingInstruction
170
171class Element(object):
172    # <tag attrib>text<child/>...</tag>tail
173
174    ##
175    # (Attribute) Element tag.
176
177    tag = None
178
179    ##
180    # (Attribute) Element attribute dictionary.  Where possible, use
181    # {@link #Element.get},
182    # {@link #Element.set},
183    # {@link #Element.keys}, and
184    # {@link #Element.items} to access
185    # element attributes.
186
187    attrib = None
188
189    ##
190    # (Attribute) Text before first subelement.  This is either a
191    # string or the value None.  Note that if there was no text, this
192    # attribute may be either None or an empty string, depending on
193    # the parser.
194
195    text = None
196
197    ##
198    # (Attribute) Text after this element's end tag, but before the
199    # next sibling element's start tag.  This is either a string or
200    # the value None.  Note that if there was no text, this attribute
201    # may be either None or an empty string, depending on the parser.
202
203    tail = None # text after end tag, if any
204
205    # constructor
206
207    def __init__(self, tag, attrib={}, **extra):
208        attrib = attrib.copy()
209        attrib.update(extra)
210        self.tag = tag
211        self.attrib = attrib
212        self._children = []
213
214    def __repr__(self):
215        return "<Element %s at 0x%x>" % (repr(self.tag), id(self))
216
217    ##
218    # Creates a new element object of the same type as this element.
219    #
220    # @param tag Element tag.
221    # @param attrib Element attributes, given as a dictionary.
222    # @return A new element instance.
223
224    def makeelement(self, tag, attrib):
225        return self.__class__(tag, attrib)
226
227    ##
228    # (Experimental) Copies the current element.  This creates a
229    # shallow copy; subelements will be shared with the original tree.
230    #
231    # @return A new element instance.
232
233    def copy(self):
234        elem = self.makeelement(self.tag, self.attrib)
235        elem.text = self.text
236        elem.tail = self.tail
237        elem[:] = self
238        return elem
239
240    ##
241    # Returns the number of subelements.  Note that this only counts
242    # full elements; to check if there's any content in an element, you
243    # have to check both the length and the <b>text</b> attribute.
244    #
245    # @return The number of subelements.
246
247    def __len__(self):
248        return len(self._children)
249
250    def __nonzero__(self):
251        warnings.warn(
252            "The behavior of this method will change in future versions.  "
253            "Use specific 'len(elem)' or 'elem is not None' test instead.",
254            FutureWarning, stacklevel=2
255            )
256        return len(self._children) != 0 # emulate old behaviour, for now
257
258    ##
259    # Returns the given subelement, by index.
260    #
261    # @param index What subelement to return.
262    # @return The given subelement.
263    # @exception IndexError If the given element does not exist.
264
265    def __getitem__(self, index):
266        return self._children[index]
267
268    ##
269    # Replaces the given subelement, by index.
270    #
271    # @param index What subelement to replace.
272    # @param element The new element value.
273    # @exception IndexError If the given element does not exist.
274
275    def __setitem__(self, index, element):
276        # if isinstance(index, slice):
277        #     for elt in element:
278        #         assert iselement(elt)
279        # else:
280        #     assert iselement(element)
281        self._children[index] = element
282
283    ##
284    # Deletes the given subelement, by index.
285    #
286    # @param index What subelement to delete.
287    # @exception IndexError If the given element does not exist.
288
289    def __delitem__(self, index):
290        del self._children[index]
291
292    ##
293    # Adds a subelement to the end of this element.  In document order,
294    # the new element will appear after the last existing subelement (or
295    # directly after the text, if it's the first subelement), but before
296    # the end tag for this element.
297    #
298    # @param element The element to add.
299
300    def append(self, element):
301        # assert iselement(element)
302        self._children.append(element)
303
304    ##
305    # Appends subelements from a sequence.
306    #
307    # @param elements A sequence object with zero or more elements.
308    # @since 1.3
309
310    def extend(self, elements):
311        # for element in elements:
312        #     assert iselement(element)
313        self._children.extend(elements)
314
315    ##
316    # Inserts a subelement at the given position in this element.
317    #
318    # @param index Where to insert the new subelement.
319
320    def insert(self, index, element):
321        # assert iselement(element)
322        self._children.insert(index, element)
323
324    ##
325    # Removes a matching subelement.  Unlike the <b>find</b> methods,
326    # this method compares elements based on identity, not on tag
327    # value or contents.  To remove subelements by other means, the
328    # easiest way is often to use a list comprehension to select what
329    # elements to keep, and use slice assignment to update the parent
330    # element.
331    #
332    # @param element What element to remove.
333    # @exception ValueError If a matching element could not be found.
334
335    def remove(self, element):
336        # assert iselement(element)
337        self._children.remove(element)
338
339    ##
340    # (Deprecated) Returns all subelements.  The elements are returned
341    # in document order.
342    #
343    # @return A list of subelements.
344    # @defreturn list of Element instances
345
346    def getchildren(self):
347        warnings.warn(
348            "This method will be removed in future versions.  "
349            "Use 'list(elem)' or iteration over elem instead.",
350            DeprecationWarning, stacklevel=2
351            )
352        return self._children
353
354    ##
355    # Finds the first matching subelement, by tag name or path.
356    #
357    # @param path What element to look for.
358    # @keyparam namespaces Optional namespace prefix map.
359    # @return The first matching element, or None if no element was found.
360    # @defreturn Element or None
361
362    def find(self, path, namespaces=None):
363        return ElementPath.find(self, path, namespaces)
364
365    ##
366    # Finds text for the first matching subelement, by tag name or path.
367    #
368    # @param path What element to look for.
369    # @param default What to return if the element was not found.
370    # @keyparam namespaces Optional namespace prefix map.
371    # @return The text content of the first matching element, or the
372    #     default value no element was found.  Note that if the element
373    #     is found, but has no text content, this method returns an
374    #     empty string.
375    # @defreturn string
376
377    def findtext(self, path, default=None, namespaces=None):
378        return ElementPath.findtext(self, path, default, namespaces)
379
380    ##
381    # Finds all matching subelements, by tag name or path.
382    #
383    # @param path What element to look for.
384    # @keyparam namespaces Optional namespace prefix map.
385    # @return A list or other sequence containing all matching elements,
386    #    in document order.
387    # @defreturn list of Element instances
388
389    def findall(self, path, namespaces=None):
390        return ElementPath.findall(self, path, namespaces)
391
392    ##
393    # Finds all matching subelements, by tag name or path.
394    #
395    # @param path What element to look for.
396    # @keyparam namespaces Optional namespace prefix map.
397    # @return An iterator or sequence containing all matching elements,
398    #    in document order.
399    # @defreturn a generated sequence of Element instances
400
401    def iterfind(self, path, namespaces=None):
402        return ElementPath.iterfind(self, path, namespaces)
403
404    ##
405    # Resets an element.  This function removes all subelements, clears
406    # all attributes, and sets the <b>text</b> and <b>tail</b> attributes
407    # to None.
408
409    def clear(self):
410        self.attrib.clear()
411        self._children = []
412        self.text = self.tail = None
413
414    ##
415    # Gets an element attribute.  Equivalent to <b>attrib.get</b>, but
416    # some implementations may handle this a bit more efficiently.
417    #
418    # @param key What attribute to look for.
419    # @param default What to return if the attribute was not found.
420    # @return The attribute value, or the default value, if the
421    #     attribute was not found.
422    # @defreturn string or None
423
424    def get(self, key, default=None):
425        return self.attrib.get(key, default)
426
427    ##
428    # Sets an element attribute.  Equivalent to <b>attrib[key] = value</b>,
429    # but some implementations may handle this a bit more efficiently.
430    #
431    # @param key What attribute to set.
432    # @param value The attribute value.
433
434    def set(self, key, value):
435        self.attrib[key] = value
436
437    ##
438    # Gets a list of attribute names.  The names are returned in an
439    # arbitrary order (just like for an ordinary Python dictionary).
440    # Equivalent to <b>attrib.keys()</b>.
441    #
442    # @return A list of element attribute names.
443    # @defreturn list of strings
444
445    def keys(self):
446        return self.attrib.keys()
447
448    ##
449    # Gets element attributes, as a sequence.  The attributes are
450    # returned in an arbitrary order.  Equivalent to <b>attrib.items()</b>.
451    #
452    # @return A list of (name, value) tuples for all attributes.
453    # @defreturn list of (string, string) tuples
454
455    def items(self):
456        return self.attrib.items()
457
458    ##
459    # Creates a tree iterator.  The iterator loops over this element
460    # and all subelements, in document order, and returns all elements
461    # with a matching tag.
462    # <p>
463    # If the tree structure is modified during iteration, new or removed
464    # elements may or may not be included.  To get a stable set, use the
465    # list() function on the iterator, and loop over the resulting list.
466    #
467    # @param tag What tags to look for (default is to return all elements).
468    # @return An iterator containing all the matching elements.
469    # @defreturn iterator
470
471    def iter(self, tag=None):
472        if tag == "*":
473            tag = None
474        if tag is None or self.tag == tag:
475            yield self
476        for e in self._children:
477            for e in e.iter(tag):
478                yield e
479
480    # compatibility
481    def getiterator(self, tag=None):
482        # Change for a DeprecationWarning in 1.4
483        warnings.warn(
484            "This method will be removed in future versions.  "
485            "Use 'elem.iter()' or 'list(elem.iter())' instead.",
486            PendingDeprecationWarning, stacklevel=2
487        )
488        return list(self.iter(tag))
489
490    ##
491    # Creates a text iterator.  The iterator loops over this element
492    # and all subelements, in document order, and returns all inner
493    # text.
494    #
495    # @return An iterator containing all inner text.
496    # @defreturn iterator
497
498    def itertext(self):
499        tag = self.tag
500        if not isinstance(tag, basestring) and tag is not None:
501            return
502        if self.text:
503            yield self.text
504        for e in self:
505            for s in e.itertext():
506                yield s
507            if e.tail:
508                yield e.tail
509
510# compatibility
511_Element = _ElementInterface = Element
512
513##
514# Subelement factory.  This function creates an element instance, and
515# appends it to an existing element.
516# <p>
517# The element name, attribute names, and attribute values can be
518# either 8-bit ASCII strings or Unicode strings.
519#
520# @param parent The parent element.
521# @param tag The subelement name.
522# @param attrib An optional dictionary, containing element attributes.
523# @param **extra Additional attributes, given as keyword arguments.
524# @return An element instance.
525# @defreturn Element
526
527def SubElement(parent, tag, attrib={}, **extra):
528    attrib = attrib.copy()
529    attrib.update(extra)
530    element = parent.makeelement(tag, attrib)
531    parent.append(element)
532    return element
533
534##
535# Comment element factory.  This factory function creates a special
536# element that will be serialized as an XML comment by the standard
537# serializer.
538# <p>
539# The comment string can be either an 8-bit ASCII string or a Unicode
540# string.
541#
542# @param text A string containing the comment string.
543# @return An element instance, representing a comment.
544# @defreturn Element
545
546def Comment(text=None):
547    element = Element(Comment)
548    element.text = text
549    return element
550
551##
552# PI element factory.  This factory function creates a special element
553# that will be serialized as an XML processing instruction by the standard
554# serializer.
555#
556# @param target A string containing the PI target.
557# @param text A string containing the PI contents, if any.
558# @return An element instance, representing a PI.
559# @defreturn Element
560
561def ProcessingInstruction(target, text=None):
562    element = Element(ProcessingInstruction)
563    element.text = target
564    if text:
565        element.text = element.text + " " + text
566    return element
567
568PI = ProcessingInstruction
569
570##
571# QName wrapper.  This can be used to wrap a QName attribute value, in
572# order to get proper namespace handling on output.
573#
574# @param text A string containing the QName value, in the form {uri}local,
575#     or, if the tag argument is given, the URI part of a QName.
576# @param tag Optional tag.  If given, the first argument is interpreted as
577#     an URI, and this argument is interpreted as a local name.
578# @return An opaque object, representing the QName.
579
580class QName(object):
581    def __init__(self, text_or_uri, tag=None):
582        if tag:
583            text_or_uri = "{%s}%s" % (text_or_uri, tag)
584        self.text = text_or_uri
585    def __str__(self):
586        return self.text
587    def __hash__(self):
588        return hash(self.text)
589    def __cmp__(self, other):
590        if isinstance(other, QName):
591            return cmp(self.text, other.text)
592        return cmp(self.text, other)
593
594# --------------------------------------------------------------------
595
596##
597# ElementTree wrapper class.  This class represents an entire element
598# hierarchy, and adds some extra support for serialization to and from
599# standard XML.
600#
601# @param element Optional root element.
602# @keyparam file Optional file handle or file name.  If given, the
603#     tree is initialized with the contents of this XML file.
604
605class ElementTree(object):
606
607    def __init__(self, element=None, file=None):
608        # assert element is None or iselement(element)
609        self._root = element # first node
610        if file:
611            self.parse(file)
612
613    ##
614    # Gets the root element for this tree.
615    #
616    # @return An element instance.
617    # @defreturn Element
618
619    def getroot(self):
620        return self._root
621
622    ##
623    # Replaces the root element for this tree.  This discards the
624    # current contents of the tree, and replaces it with the given
625    # element.  Use with care.
626    #
627    # @param element An element instance.
628
629    def _setroot(self, element):
630        # assert iselement(element)
631        self._root = element
632
633    ##
634    # Loads an external XML document into this element tree.
635    #
636    # @param source A file name or file object.  If a file object is
637    #     given, it only has to implement a <b>read(n)</b> method.
638    # @keyparam parser An optional parser instance.  If not given, the
639    #     standard {@link XMLParser} parser is used.
640    # @return The document root element.
641    # @defreturn Element
642    # @exception ParseError If the parser fails to parse the document.
643
644    def parse(self, source, parser=None):
645        if not hasattr(source, "read"):
646            source = open(source, "rb")
647        if not parser:
648            parser = XMLParser(target=TreeBuilder())
649        while 1:
650            data = source.read(65536)
651            if not data:
652                break
653            parser.feed(data)
654        self._root = parser.close()
655        return self._root
656
657    ##
658    # Creates a tree iterator for the root element.  The iterator loops
659    # over all elements in this tree, in document order.
660    #
661    # @param tag What tags to look for (default is to return all elements)
662    # @return An iterator.
663    # @defreturn iterator
664
665    def iter(self, tag=None):
666        # assert self._root is not None
667        return self._root.iter(tag)
668
669    # compatibility
670    def getiterator(self, tag=None):
671        # Change for a DeprecationWarning in 1.4
672        warnings.warn(
673            "This method will be removed in future versions.  "
674            "Use 'tree.iter()' or 'list(tree.iter())' instead.",
675            PendingDeprecationWarning, stacklevel=2
676        )
677        return list(self.iter(tag))
678
679    ##
680    # Finds the first toplevel element with given tag.
681    # Same as getroot().find(path).
682    #
683    # @param path What element to look for.
684    # @keyparam namespaces Optional namespace prefix map.
685    # @return The first matching element, or None if no element was found.
686    # @defreturn Element or None
687
688    def find(self, path, namespaces=None):
689        # assert self._root is not None
690        if path[:1] == "/":
691            path = "." + path
692            warnings.warn(
693                "This search is broken in 1.3 and earlier, and will be "
694                "fixed in a future version.  If you rely on the current "
695                "behaviour, change it to %r" % path,
696                FutureWarning, stacklevel=2
697                )
698        return self._root.find(path, namespaces)
699
700    ##
701    # Finds the element text for the first toplevel element with given
702    # tag.  Same as getroot().findtext(path).
703    #
704    # @param path What toplevel element to look for.
705    # @param default What to return if the element was not found.
706    # @keyparam namespaces Optional namespace prefix map.
707    # @return The text content of the first matching element, or the
708    #     default value no element was found.  Note that if the element
709    #     is found, but has no text content, this method returns an
710    #     empty string.
711    # @defreturn string
712
713    def findtext(self, path, default=None, namespaces=None):
714        # assert self._root is not None
715        if path[:1] == "/":
716            path = "." + path
717            warnings.warn(
718                "This search is broken in 1.3 and earlier, and will be "
719                "fixed in a future version.  If you rely on the current "
720                "behaviour, change it to %r" % path,
721                FutureWarning, stacklevel=2
722                )
723        return self._root.findtext(path, default, namespaces)
724
725    ##
726    # Finds all toplevel elements with the given tag.
727    # Same as getroot().findall(path).
728    #
729    # @param path What element to look for.
730    # @keyparam namespaces Optional namespace prefix map.
731    # @return A list or iterator containing all matching elements,
732    #    in document order.
733    # @defreturn list of Element instances
734
735    def findall(self, path, namespaces=None):
736        # assert self._root is not None
737        if path[:1] == "/":
738            path = "." + path
739            warnings.warn(
740                "This search is broken in 1.3 and earlier, and will be "
741                "fixed in a future version.  If you rely on the current "
742                "behaviour, change it to %r" % path,
743                FutureWarning, stacklevel=2
744                )
745        return self._root.findall(path, namespaces)
746
747    ##
748    # Finds all matching subelements, by tag name or path.
749    # Same as getroot().iterfind(path).
750    #
751    # @param path What element to look for.
752    # @keyparam namespaces Optional namespace prefix map.
753    # @return An iterator or sequence containing all matching elements,
754    #    in document order.
755    # @defreturn a generated sequence of Element instances
756
757    def iterfind(self, path, namespaces=None):
758        # assert self._root is not None
759        if path[:1] == "/":
760            path = "." + path
761            warnings.warn(
762                "This search is broken in 1.3 and earlier, and will be "
763                "fixed in a future version.  If you rely on the current "
764                "behaviour, change it to %r" % path,
765                FutureWarning, stacklevel=2
766                )
767        return self._root.iterfind(path, namespaces)
768
769    ##
770    # Writes the element tree to a file, as XML.
771    #
772    # @def write(file, **options)
773    # @param file A file name, or a file object opened for writing.
774    # @param **options Options, given as keyword arguments.
775    # @keyparam encoding Optional output encoding (default is US-ASCII).
776    # @keyparam method Optional output method ("xml", "html", "text" or
777    #     "c14n"; default is "xml").
778    # @keyparam xml_declaration Controls if an XML declaration should
779    #     be added to the file.  Use False for never, True for always,
780    #     None for only if not US-ASCII or UTF-8.  None is default.
781
782    def write(self, file_or_filename,
783              # keyword arguments
784              encoding=None,
785              xml_declaration=None,
786              default_namespace=None,
787              method=None):
788        # assert self._root is not None
789        if not method:
790            method = "xml"
791        elif method not in _serialize:
792            # FIXME: raise an ImportError for c14n if ElementC14N is missing?
793            raise ValueError("unknown method %r" % method)
794        if hasattr(file_or_filename, "write"):
795            file = file_or_filename
796        else:
797            file = open(file_or_filename, "wb")
798        write = file.write
799        if not encoding:
800            if method == "c14n":
801                encoding = "utf-8"
802            else:
803                encoding = "us-ascii"
804        elif xml_declaration or (xml_declaration is None and
805                                 encoding not in ("utf-8", "us-ascii")):
806            if method == "xml":
807                write("<?xml version='1.0' encoding='%s'?>\n" % encoding)
808        if method == "text":
809            _serialize_text(write, self._root, encoding)
810        else:
811            qnames, namespaces = _namespaces(
812                self._root, encoding, default_namespace
813                )
814            serialize = _serialize[method]
815            serialize(write, self._root, encoding, qnames, namespaces)
816        if file_or_filename is not file:
817            file.close()
818
819    def write_c14n(self, file):
820        # lxml.etree compatibility.  use output method instead
821        return self.write(file, method="c14n")
822
823# --------------------------------------------------------------------
824# serialization support
825
826def _namespaces(elem, encoding, default_namespace=None):
827    # identify namespaces used in this tree
828
829    # maps qnames to *encoded* prefix:local names
830    qnames = {None: None}
831
832    # maps uri:s to prefixes
833    namespaces = {}
834    if default_namespace:
835        namespaces[default_namespace] = ""
836
837    def encode(text):
838        return text.encode(encoding)
839
840    def add_qname(qname):
841        # calculate serialized qname representation
842        try:
843            if qname[:1] == "{":
844                uri, tag = qname[1:].rsplit("}", 1)
845                prefix = namespaces.get(uri)
846                if prefix is None:
847                    prefix = _namespace_map.get(uri)
848                    if prefix is None:
849                        prefix = "ns%d" % len(namespaces)
850                    if prefix != "xml":
851                        namespaces[uri] = prefix
852                if prefix:
853                    qnames[qname] = encode("%s:%s" % (prefix, tag))
854                else:
855                    qnames[qname] = encode(tag) # default element
856            else:
857                if default_namespace:
858                    # FIXME: can this be handled in XML 1.0?
859                    raise ValueError(
860                        "cannot use non-qualified names with "
861                        "default_namespace option"
862                        )
863                qnames[qname] = encode(qname)
864        except TypeError:
865            _raise_serialization_error(qname)
866
867    # populate qname and namespaces table
868    try:
869        iterate = elem.iter
870    except AttributeError:
871        iterate = elem.getiterator # cET compatibility
872    for elem in iterate():
873        tag = elem.tag
874        if isinstance(tag, QName):
875            if tag.text not in qnames:
876                add_qname(tag.text)
877        elif isinstance(tag, basestring):
878            if tag not in qnames:
879                add_qname(tag)
880        elif tag is not None and tag is not Comment and tag is not PI:
881            _raise_serialization_error(tag)
882        for key, value in elem.items():
883            if isinstance(key, QName):
884                key = key.text
885            if key not in qnames:
886                add_qname(key)
887            if isinstance(value, QName) and value.text not in qnames:
888                add_qname(value.text)
889        text = elem.text
890        if isinstance(text, QName) and text.text not in qnames:
891            add_qname(text.text)
892    return qnames, namespaces
893
894def _serialize_xml(write, elem, encoding, qnames, namespaces):
895    tag = elem.tag
896    text = elem.text
897    if tag is Comment:
898        write("<!--%s-->" % _encode(text, encoding))
899    elif tag is ProcessingInstruction:
900        write("<?%s?>" % _encode(text, encoding))
901    else:
902        tag = qnames[tag]
903        if tag is None:
904            if text:
905                write(_escape_cdata(text, encoding))
906            for e in elem:
907                _serialize_xml(write, e, encoding, qnames, None)
908        else:
909            write("<" + tag)
910            items = elem.items()
911            if items or namespaces:
912                if namespaces:
913                    for v, k in sorted(namespaces.items(),
914                                       key=lambda x: x[1]):  # sort on prefix
915                        if k:
916                            k = ":" + k
917                        write(" xmlns%s=\"%s\"" % (
918                            k.encode(encoding),
919                            _escape_attrib(v, encoding)
920                            ))
921                for k, v in sorted(items):  # lexical order
922                    if isinstance(k, QName):
923                        k = k.text
924                    if isinstance(v, QName):
925                        v = qnames[v.text]
926                    else:
927                        v = _escape_attrib(v, encoding)
928                    write(" %s=\"%s\"" % (qnames[k], v))
929            if text or len(elem):
930                write(">")
931                if text:
932                    write(_escape_cdata(text, encoding))
933                for e in elem:
934                    _serialize_xml(write, e, encoding, qnames, None)
935                write("</" + tag + ">")
936            else:
937                write(" />")
938    if elem.tail:
939        write(_escape_cdata(elem.tail, encoding))
940
941HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
942              "img", "input", "isindex", "link", "meta" "param")
943
944try:
945    HTML_EMPTY = set(HTML_EMPTY)
946except NameError:
947    pass
948
949def _serialize_html(write, elem, encoding, qnames, namespaces):
950    tag = elem.tag
951    text = elem.text
952    if tag is Comment:
953        write("<!--%s-->" % _escape_cdata(text, encoding))
954    elif tag is ProcessingInstruction:
955        write("<?%s?>" % _escape_cdata(text, encoding))
956    else:
957        tag = qnames[tag]
958        if tag is None:
959            if text:
960                write(_escape_cdata(text, encoding))
961            for e in elem:
962                _serialize_html(write, e, encoding, qnames, None)
963        else:
964            write("<" + tag)
965            items = elem.items()
966            if items or namespaces:
967                if namespaces:
968                    for v, k in sorted(namespaces.items(),
969                                       key=lambda x: x[1]):  # sort on prefix
970                        if k:
971                            k = ":" + k
972                        write(" xmlns%s=\"%s\"" % (
973                            k.encode(encoding),
974                            _escape_attrib(v, encoding)
975                            ))
976                for k, v in sorted(items):  # lexical order
977                    if isinstance(k, QName):
978                        k = k.text
979                    if isinstance(v, QName):
980                        v = qnames[v.text]
981                    else:
982                        v = _escape_attrib_html(v, encoding)
983                    # FIXME: handle boolean attributes
984                    write(" %s=\"%s\"" % (qnames[k], v))
985            write(">")
986            tag = tag.lower()
987            if text:
988                if tag == "script" or tag == "style":
989                    write(_encode(text, encoding))
990                else:
991                    write(_escape_cdata(text, encoding))
992            for e in elem:
993                _serialize_html(write, e, encoding, qnames, None)
994            if tag not in HTML_EMPTY:
995                write("</" + tag + ">")
996    if elem.tail:
997        write(_escape_cdata(elem.tail, encoding))
998
999def _serialize_text(write, elem, encoding):
1000    for part in elem.itertext():
1001        write(part.encode(encoding))
1002    if elem.tail:
1003        write(elem.tail.encode(encoding))
1004
1005_serialize = {
1006    "xml": _serialize_xml,
1007    "html": _serialize_html,
1008    "text": _serialize_text,
1009# this optional method is imported at the end of the module
1010#   "c14n": _serialize_c14n,
1011}
1012
1013##
1014# Registers a namespace prefix.  The registry is global, and any
1015# existing mapping for either the given prefix or the namespace URI
1016# will be removed.
1017#
1018# @param prefix Namespace prefix.
1019# @param uri Namespace uri.  Tags and attributes in this namespace
1020#     will be serialized with the given prefix, if at all possible.
1021# @exception ValueError If the prefix is reserved, or is otherwise
1022#     invalid.
1023
1024def register_namespace(prefix, uri):
1025    if re.match("ns\d+$", prefix):
1026        raise ValueError("Prefix format reserved for internal use")
1027    for k, v in _namespace_map.items():
1028        if k == uri or v == prefix:
1029            del _namespace_map[k]
1030    _namespace_map[uri] = prefix
1031
1032_namespace_map = {
1033    # "well-known" namespace prefixes
1034    "http://www.w3.org/XML/1998/namespace": "xml",
1035    "http://www.w3.org/1999/xhtml": "html",
1036    "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
1037    "http://schemas.xmlsoap.org/wsdl/": "wsdl",
1038    # xml schema
1039    "http://www.w3.org/2001/XMLSchema": "xs",
1040    "http://www.w3.org/2001/XMLSchema-instance": "xsi",
1041    # dublin core
1042    "http://purl.org/dc/elements/1.1/": "dc",
1043}
1044
1045def _raise_serialization_error(text):
1046    raise TypeError(
1047        "cannot serialize %r (type %s)" % (text, type(text).__name__)
1048        )
1049
1050def _encode(text, encoding):
1051    try:
1052        return text.encode(encoding, "xmlcharrefreplace")
1053    except (TypeError, AttributeError):
1054        _raise_serialization_error(text)
1055
1056def _escape_cdata(text, encoding):
1057    # escape character data
1058    try:
1059        # it's worth avoiding do-nothing calls for strings that are
1060        # shorter than 500 character, or so.  assume that's, by far,
1061        # the most common case in most applications.
1062        if "&" in text:
1063            text = text.replace("&", "&amp;")
1064        if "<" in text:
1065            text = text.replace("<", "&lt;")
1066        if ">" in text:
1067            text = text.replace(">", "&gt;")
1068        return text.encode(encoding, "xmlcharrefreplace")
1069    except (TypeError, AttributeError):
1070        _raise_serialization_error(text)
1071
1072def _escape_attrib(text, encoding):
1073    # escape attribute value
1074    try:
1075        if "&" in text:
1076            text = text.replace("&", "&amp;")
1077        if "<" in text:
1078            text = text.replace("<", "&lt;")
1079        if ">" in text:
1080            text = text.replace(">", "&gt;")
1081        if "\"" in text:
1082            text = text.replace("\"", "&quot;")
1083        if "\n" in text:
1084            text = text.replace("\n", "&#10;")
1085        return text.encode(encoding, "xmlcharrefreplace")
1086    except (TypeError, AttributeError):
1087        _raise_serialization_error(text)
1088
1089def _escape_attrib_html(text, encoding):
1090    # escape attribute value
1091    try:
1092        if "&" in text:
1093            text = text.replace("&", "&amp;")
1094        if ">" in text:
1095            text = text.replace(">", "&gt;")
1096        if "\"" in text:
1097            text = text.replace("\"", "&quot;")
1098        return text.encode(encoding, "xmlcharrefreplace")
1099    except (TypeError, AttributeError):
1100        _raise_serialization_error(text)
1101
1102# --------------------------------------------------------------------
1103
1104##
1105# Generates a string representation of an XML element, including all
1106# subelements.
1107#
1108# @param element An Element instance.
1109# @keyparam encoding Optional output encoding (default is US-ASCII).
1110# @keyparam method Optional output method ("xml", "html", "text" or
1111#     "c14n"; default is "xml").
1112# @return An encoded string containing the XML data.
1113# @defreturn string
1114
1115def tostring(element, encoding=None, method=None):
1116    class dummy:
1117        pass
1118    data = []
1119    file = dummy()
1120    file.write = data.append
1121    ElementTree(element).write(file, encoding, method=method)
1122    return "".join(data)
1123
1124##
1125# Generates a string representation of an XML element, including all
1126# subelements.  The string is returned as a sequence of string fragments.
1127#
1128# @param element An Element instance.
1129# @keyparam encoding Optional output encoding (default is US-ASCII).
1130# @keyparam method Optional output method ("xml", "html", "text" or
1131#     "c14n"; default is "xml").
1132# @return A sequence object containing the XML data.
1133# @defreturn sequence
1134# @since 1.3
1135
1136def tostringlist(element, encoding=None, method=None):
1137    class dummy:
1138        pass
1139    data = []
1140    file = dummy()
1141    file.write = data.append
1142    ElementTree(element).write(file, encoding, method=method)
1143    # FIXME: merge small fragments into larger parts
1144    return data
1145
1146##
1147# Writes an element tree or element structure to sys.stdout.  This
1148# function should be used for debugging only.
1149# <p>
1150# The exact output format is implementation dependent.  In this
1151# version, it's written as an ordinary XML file.
1152#
1153# @param elem An element tree or an individual element.
1154
1155def dump(elem):
1156    # debugging
1157    if not isinstance(elem, ElementTree):
1158        elem = ElementTree(elem)
1159    elem.write(sys.stdout)
1160    tail = elem.getroot().tail
1161    if not tail or tail[-1] != "\n":
1162        sys.stdout.write("\n")
1163
1164# --------------------------------------------------------------------
1165# parsing
1166
1167##
1168# Parses an XML document into an element tree.
1169#
1170# @param source A filename or file object containing XML data.
1171# @param parser An optional parser instance.  If not given, the
1172#     standard {@link XMLParser} parser is used.
1173# @return An ElementTree instance
1174
1175def parse(source, parser=None):
1176    tree = ElementTree()
1177    tree.parse(source, parser)
1178    return tree
1179
1180##
1181# Parses an XML document into an element tree incrementally, and reports
1182# what's going on to the user.
1183#
1184# @param source A filename or file object containing XML data.
1185# @param events A list of events to report back.  If omitted, only "end"
1186#     events are reported.
1187# @param parser An optional parser instance.  If not given, the
1188#     standard {@link XMLParser} parser is used.
1189# @return A (event, elem) iterator.
1190
1191def iterparse(source, events=None, parser=None):
1192    if not hasattr(source, "read"):
1193        source = open(source, "rb")
1194    if not parser:
1195        parser = XMLParser(target=TreeBuilder())
1196    return _IterParseIterator(source, events, parser)
1197
1198class _IterParseIterator(object):
1199
1200    def __init__(self, source, events, parser):
1201        self._file = source
1202        self._events = []
1203        self._index = 0
1204        self.root = self._root = None
1205        self._parser = parser
1206        # wire up the parser for event reporting
1207        parser = self._parser._parser
1208        append = self._events.append
1209        if events is None:
1210            events = ["end"]
1211        for event in events:
1212            if event == "start":
1213                try:
1214                    parser.ordered_attributes = 1
1215                    parser.specified_attributes = 1
1216                    def handler(tag, attrib_in, event=event, append=append,
1217                                start=self._parser._start_list):
1218                        append((event, start(tag, attrib_in)))
1219                    parser.StartElementHandler = handler
1220                except AttributeError:
1221                    def handler(tag, attrib_in, event=event, append=append,
1222                                start=self._parser._start):
1223                        append((event, start(tag, attrib_in)))
1224                    parser.StartElementHandler = handler
1225            elif event == "end":
1226                def handler(tag, event=event, append=append,
1227                            end=self._parser._end):
1228                    append((event, end(tag)))
1229                parser.EndElementHandler = handler
1230            elif event == "start-ns":
1231                def handler(prefix, uri, event=event, append=append):
1232                    try:
1233                        uri = (uri or "").encode("ascii")
1234                    except UnicodeError:
1235                        pass
1236                    append((event, (prefix or "", uri or "")))
1237                parser.StartNamespaceDeclHandler = handler
1238            elif event == "end-ns":
1239                def handler(prefix, event=event, append=append):
1240                    append((event, None))
1241                parser.EndNamespaceDeclHandler = handler
1242            else:
1243                raise ValueError("unknown event %r" % event)
1244
1245    def next(self):
1246        while 1:
1247            try:
1248                item = self._events[self._index]
1249            except IndexError:
1250                if self._parser is None:
1251                    self.root = self._root
1252                    raise StopIteration
1253                # load event buffer
1254                del self._events[:]
1255                self._index = 0
1256                data = self._file.read(16384)
1257                if data:
1258                    self._parser.feed(data)
1259                else:
1260                    self._root = self._parser.close()
1261                    self._parser = None
1262            else:
1263                self._index = self._index + 1
1264                return item
1265
1266    def __iter__(self):
1267        return self
1268
1269##
1270# Parses an XML document from a string constant.  This function can
1271# be used to embed "XML literals" in Python code.
1272#
1273# @param source A string containing XML data.
1274# @param parser An optional parser instance.  If not given, the
1275#     standard {@link XMLParser} parser is used.
1276# @return An Element instance.
1277# @defreturn Element
1278
1279def XML(text, parser=None):
1280    if not parser:
1281        parser = XMLParser(target=TreeBuilder())
1282    parser.feed(text)
1283    return parser.close()
1284
1285##
1286# Parses an XML document from a string constant, and also returns
1287# a dictionary which maps from element id:s to elements.
1288#
1289# @param source A string containing XML data.
1290# @param parser An optional parser instance.  If not given, the
1291#     standard {@link XMLParser} parser is used.
1292# @return A tuple containing an Element instance and a dictionary.
1293# @defreturn (Element, dictionary)
1294
1295def XMLID(text, parser=None):
1296    if not parser:
1297        parser = XMLParser(target=TreeBuilder())
1298    parser.feed(text)
1299    tree = parser.close()
1300    ids = {}
1301    for elem in tree.iter():
1302        id = elem.get("id")
1303        if id:
1304            ids[id] = elem
1305    return tree, ids
1306
1307##
1308# Parses an XML document from a string constant.  Same as {@link #XML}.
1309#
1310# @def fromstring(text)
1311# @param source A string containing XML data.
1312# @return An Element instance.
1313# @defreturn Element
1314
1315fromstring = XML
1316
1317##
1318# Parses an XML document from a sequence of string fragments.
1319#
1320# @param sequence A list or other sequence containing XML data fragments.
1321# @param parser An optional parser instance.  If not given, the
1322#     standard {@link XMLParser} parser is used.
1323# @return An Element instance.
1324# @defreturn Element
1325# @since 1.3
1326
1327def fromstringlist(sequence, parser=None):
1328    if not parser:
1329        parser = XMLParser(target=TreeBuilder())
1330    for text in sequence:
1331        parser.feed(text)
1332    return parser.close()
1333
1334# --------------------------------------------------------------------
1335
1336##
1337# Generic element structure builder.  This builder converts a sequence
1338# of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link
1339# #TreeBuilder.end} method calls to a well-formed element structure.
1340# <p>
1341# You can use this class to build an element structure using a custom XML
1342# parser, or a parser for some other XML-like format.
1343#
1344# @param element_factory Optional element factory.  This factory
1345#    is called to create new Element instances, as necessary.
1346
1347class TreeBuilder(object):
1348
1349    def __init__(self, element_factory=None):
1350        self._data = [] # data collector
1351        self._elem = [] # element stack
1352        self._last = None # last element
1353        self._tail = None # true if we're after an end tag
1354        if element_factory is None:
1355            element_factory = Element
1356        self._factory = element_factory
1357
1358    ##
1359    # Flushes the builder buffers, and returns the toplevel document
1360    # element.
1361    #
1362    # @return An Element instance.
1363    # @defreturn Element
1364
1365    def close(self):
1366        assert len(self._elem) == 0, "missing end tags"
1367        assert self._last is not None, "missing toplevel element"
1368        return self._last
1369
1370    def _flush(self):
1371        if self._data:
1372            if self._last is not None:
1373                text = "".join(self._data)
1374                if self._tail:
1375                    assert self._last.tail is None, "internal error (tail)"
1376                    self._last.tail = text
1377                else:
1378                    assert self._last.text is None, "internal error (text)"
1379                    self._last.text = text
1380            self._data = []
1381
1382    ##
1383    # Adds text to the current element.
1384    #
1385    # @param data A string.  This should be either an 8-bit string
1386    #    containing ASCII text, or a Unicode string.
1387
1388    def data(self, data):
1389        self._data.append(data)
1390
1391    ##
1392    # Opens a new element.
1393    #
1394    # @param tag The element name.
1395    # @param attrib A dictionary containing element attributes.
1396    # @return The opened element.
1397    # @defreturn Element
1398
1399    def start(self, tag, attrs):
1400        self._flush()
1401        self._last = elem = self._factory(tag, attrs)
1402        if self._elem:
1403            self._elem[-1].append(elem)
1404        self._elem.append(elem)
1405        self._tail = 0
1406        return elem
1407
1408    ##
1409    # Closes the current element.
1410    #
1411    # @param tag The element name.
1412    # @return The closed element.
1413    # @defreturn Element
1414
1415    def end(self, tag):
1416        self._flush()
1417        self._last = self._elem.pop()
1418        assert self._last.tag == tag,\
1419               "end tag mismatch (expected %s, got %s)" % (
1420                   self._last.tag, tag)
1421        self._tail = 1
1422        return self._last
1423
1424##
1425# Element structure builder for XML source data, based on the
1426# <b>expat</b> parser.
1427#
1428# @keyparam target Target object.  If omitted, the builder uses an
1429#     instance of the standard {@link #TreeBuilder} class.
1430# @keyparam html Predefine HTML entities.  This flag is not supported
1431#     by the current implementation.
1432# @keyparam encoding Optional encoding.  If given, the value overrides
1433#     the encoding specified in the XML file.
1434# @see #ElementTree
1435# @see #TreeBuilder
1436
1437class XMLParser(object):
1438
1439    def __init__(self, html=0, target=None, encoding=None):
1440        try:
1441            from xml.parsers import expat
1442        except ImportError:
1443            try:
1444                import pyexpat as expat
1445            except ImportError:
1446                raise ImportError(
1447                    "No module named expat; use SimpleXMLTreeBuilder instead"
1448                    )
1449        parser = expat.ParserCreate(encoding, "}")
1450        if target is None:
1451            target = TreeBuilder()
1452        # underscored names are provided for compatibility only
1453        self.parser = self._parser = parser
1454        self.target = self._target = target
1455        self._error = expat.error
1456        self._names = {} # name memo cache
1457        # callbacks
1458        parser.DefaultHandlerExpand = self._default
1459        parser.StartElementHandler = self._start
1460        parser.EndElementHandler = self._end
1461        parser.CharacterDataHandler = self._data
1462        # optional callbacks
1463        parser.CommentHandler = self._comment
1464        parser.ProcessingInstructionHandler = self._pi
1465        # let expat do the buffering, if supported
1466        try:
1467            self._parser.buffer_text = 1
1468        except AttributeError:
1469            pass
1470        # use new-style attribute handling, if supported
1471        try:
1472            self._parser.ordered_attributes = 1
1473            self._parser.specified_attributes = 1
1474            parser.StartElementHandler = self._start_list
1475        except AttributeError:
1476            pass
1477        self._doctype = None
1478        self.entity = {}
1479        try:
1480            self.version = "Expat %d.%d.%d" % expat.version_info
1481        except AttributeError:
1482            pass # unknown
1483
1484    def _raiseerror(self, value):
1485        err = ParseError(value)
1486        err.code = value.code
1487        err.position = value.lineno, value.offset
1488        raise err
1489
1490    def _fixtext(self, text):
1491        # convert text string to ascii, if possible
1492        try:
1493            return text.encode("ascii")
1494        except UnicodeError:
1495            return text
1496
1497    def _fixname(self, key):
1498        # expand qname, and convert name string to ascii, if possible
1499        try:
1500            name = self._names[key]
1501        except KeyError:
1502            name = key
1503            if "}" in name:
1504                name = "{" + name
1505            self._names[key] = name = self._fixtext(name)
1506        return name
1507
1508    def _start(self, tag, attrib_in):
1509        fixname = self._fixname
1510        fixtext = self._fixtext
1511        tag = fixname(tag)
1512        attrib = {}
1513        for key, value in attrib_in.items():
1514            attrib[fixname(key)] = fixtext(value)
1515        return self.target.start(tag, attrib)
1516
1517    def _start_list(self, tag, attrib_in):
1518        fixname = self._fixname
1519        fixtext = self._fixtext
1520        tag = fixname(tag)
1521        attrib = {}
1522        if attrib_in:
1523            for i in range(0, len(attrib_in), 2):
1524                attrib[fixname(attrib_in[i])] = fixtext(attrib_in[i+1])
1525        return self.target.start(tag, attrib)
1526
1527    def _data(self, text):
1528        return self.target.data(self._fixtext(text))
1529
1530    def _end(self, tag):
1531        return self.target.end(self._fixname(tag))
1532
1533    def _comment(self, data):
1534        try:
1535            comment = self.target.comment
1536        except AttributeError:
1537            pass
1538        else:
1539            return comment(self._fixtext(data))
1540
1541    def _pi(self, target, data):
1542        try:
1543            pi = self.target.pi
1544        except AttributeError:
1545            pass
1546        else:
1547            return pi(self._fixtext(target), self._fixtext(data))
1548
1549    def _default(self, text):
1550        prefix = text[:1]
1551        if prefix == "&":
1552            # deal with undefined entities
1553            try:
1554                self.target.data(self.entity[text[1:-1]])
1555            except KeyError:
1556                from xml.parsers import expat
1557                err = expat.error(
1558                    "undefined entity %s: line %d, column %d" %
1559                    (text, self._parser.ErrorLineNumber,
1560                    self._parser.ErrorColumnNumber)
1561                    )
1562                err.code = 11 # XML_ERROR_UNDEFINED_ENTITY
1563                err.lineno = self._parser.ErrorLineNumber
1564                err.offset = self._parser.ErrorColumnNumber
1565                raise err
1566        elif prefix == "<" and text[:9] == "<!DOCTYPE":
1567            self._doctype = [] # inside a doctype declaration
1568        elif self._doctype is not None:
1569            # parse doctype contents
1570            if prefix == ">":
1571                self._doctype = None
1572                return
1573            text = text.strip()
1574            if not text:
1575                return
1576            self._doctype.append(text)
1577            n = len(self._doctype)
1578            if n > 2:
1579                type = self._doctype[1]
1580                if type == "PUBLIC" and n == 4:
1581                    name, type, pubid, system = self._doctype
1582                elif type == "SYSTEM" and n == 3:
1583                    name, type, system = self._doctype
1584                    pubid = None
1585                else:
1586                    return
1587                if pubid:
1588                    pubid = pubid[1:-1]
1589                if hasattr(self.target, "doctype"):
1590                    self.target.doctype(name, pubid, system[1:-1])
1591                elif self.doctype is not self._XMLParser__doctype:
1592                    # warn about deprecated call
1593                    self._XMLParser__doctype(name, pubid, system[1:-1])
1594                    self.doctype(name, pubid, system[1:-1])
1595                self._doctype = None
1596
1597    ##
1598    # (Deprecated) Handles a doctype declaration.
1599    #
1600    # @param name Doctype name.
1601    # @param pubid Public identifier.
1602    # @param system System identifier.
1603
1604    def doctype(self, name, pubid, system):
1605        """This method of XMLParser is deprecated."""
1606        warnings.warn(
1607            "This method of XMLParser is deprecated.  Define doctype() "
1608            "method on the TreeBuilder target.",
1609            DeprecationWarning,
1610            )
1611
1612    # sentinel, if doctype is redefined in a subclass
1613    __doctype = doctype
1614
1615    ##
1616    # Feeds data to the parser.
1617    #
1618    # @param data Encoded data.
1619
1620    def feed(self, data):
1621        try:
1622            self._parser.Parse(data, 0)
1623        except self._error, v:
1624            self._raiseerror(v)
1625
1626    ##
1627    # Finishes feeding data to the parser.
1628    #
1629    # @return An element structure.
1630    # @defreturn Element
1631
1632    def close(self):
1633        try:
1634            self._parser.Parse("", 1) # end of data
1635        except self._error, v:
1636            self._raiseerror(v)
1637        tree = self.target.close()
1638        del self.target, self._parser # get rid of circular references
1639        return tree
1640
1641# compatibility
1642XMLTreeBuilder = XMLParser
1643
1644# workaround circular import.
1645try:
1646    from ElementC14N import _serialize_c14n
1647    _serialize["c14n"] = _serialize_c14n
1648except ImportError:
1649    pass
1650