14710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
24710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# ElementTree
34710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# $Id: ElementTree.py 3440 2008-07-18 14:45:01Z fredrik $
44710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
54710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# light-weight XML support for Python 2.3 and later.
64710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
74710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# history (since 1.2.6):
84710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 2005-11-12 fl   added tostringlist/fromstringlist helpers
94710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 2006-07-05 fl   merged in selected changes from the 1.3 sandbox
104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 2006-07-05 fl   removed support for 2.1 and earlier
114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 2007-06-21 fl   added deprecation/future warnings
124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 2007-08-25 fl   added doctype hook, added parser version attribute etc
134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 2007-08-26 fl   added new serializer code (better namespace handling, etc)
144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 2007-08-27 fl   warn for broken /tag searches on tree level
154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 2007-09-02 fl   added html/text methods to serializer (experimental)
164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 2007-09-05 fl   added method argument to tostring/tostringlist
174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 2007-09-06 fl   improved error handling
184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 2007-09-13 fl   added itertext, iterfind; assorted cleanups
194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 2007-12-15 fl   added C14N hooks, copy method (experimental)
204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Copyright (c) 1999-2008 by Fredrik Lundh.  All rights reserved.
224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# fredrik@pythonware.com
244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# http://www.pythonware.com
254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# --------------------------------------------------------------------
274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# The ElementTree toolkit is
284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Copyright (c) 1999-2008 by Fredrik Lundh
304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# By obtaining, using, and/or copying this software and/or its
324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# associated documentation, you agree that you have read, understood,
334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# and will comply with the following terms and conditions:
344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Permission to use, copy, modify, and distribute this software and
364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# its associated documentation for any purpose and without fee is
374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# hereby granted, provided that the above copyright notice appears in
384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# all copies, and that both that copyright notice and this permission
394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# notice appear in supporting documentation, and that the name of
404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Secret Labs AB or the author not be used in advertising or publicity
414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# pertaining to distribution of the software without specific, written
424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# prior permission.
434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# OF THIS SOFTWARE.
524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# --------------------------------------------------------------------
534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Licensed to PSF under a Contributor Agreement.
554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# See http://www.python.org/psf/license for licensing details.
564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm__all__ = [
584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # public symbols
594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "Comment",
604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "dump",
614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "Element", "ElementTree",
624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "fromstring", "fromstringlist",
634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "iselement", "iterparse",
644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "parse", "ParseError",
654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "PI", "ProcessingInstruction",
664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "QName",
674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "SubElement",
684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "tostring", "tostringlist",
694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "TreeBuilder",
704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "VERSION",
714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "XML",
724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "XMLParser", "XMLTreeBuilder",
734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ]
744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmVERSION = "1.3.0"
764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm##
784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# The <b>Element</b> type is a flexible container object, designed to
794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# store hierarchical data structures in memory. The type can be
804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# described as a cross between a list and a dictionary.
814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# <p>
824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Each element has a number of properties associated with it:
834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# <ul>
844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# <li>a <i>tag</i>. This is a string identifying what kind of data
854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# this element represents (the element type, in other words).</li>
864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# <li>a number of <i>attributes</i>, stored in a Python dictionary.</li>
874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# <li>a <i>text</i> string.</li>
884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# <li>an optional <i>tail</i> string.</li>
894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# <li>a number of <i>child elements</i>, stored in a Python sequence</li>
904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# </ul>
914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# To create an element instance, use the {@link #Element} constructor
934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# or the {@link #SubElement} factory function.
944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# <p>
954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# The {@link #ElementTree} class can be used to wrap an element
964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# structure, and convert it from and to XML.
974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm##
984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport sys
1004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport re
1014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport warnings
1024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass _SimpleElementPath(object):
1054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # emulate pre-1.2 find/findtext/findall behaviour
1064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def find(self, element, tag, namespaces=None):
1074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for elem in element:
1084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if elem.tag == tag:
1094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return elem
1104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return None
1114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def findtext(self, element, tag, default=None, namespaces=None):
1124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        elem = self.find(element, tag)
1134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if elem is None:
1144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return default
1154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return elem.text or ""
1164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def iterfind(self, element, tag, namespaces=None):
1174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if tag[:3] == ".//":
1184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for elem in element.iter(tag[3:]):
1194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                yield elem
1204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for elem in element:
1214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if elem.tag == tag:
1224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                yield elem
1234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def findall(self, element, tag, namespaces=None):
1244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return list(self.iterfind(element, tag, namespaces))
1254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmtry:
1274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    from . import ElementPath
1284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmexcept ImportError:
1294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ElementPath = _SimpleElementPath()
1304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm##
1324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Parser error.  This is a subclass of <b>SyntaxError</b>.
1334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# <p>
1344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# In addition to the exception value, an exception instance contains a
1354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# specific exception code in the <b>code</b> attribute, and the line and
1364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# column of the error in the <b>position</b> attribute.
1374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass ParseError(SyntaxError):
1394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    pass
1404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# --------------------------------------------------------------------
1424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm##
1444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Checks if an object appears to be a valid element object.
1454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
1464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param An element instance.
1474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @return A true value if this is an element object.
1484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @defreturn flag
1494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef iselement(element):
1514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # FIXME: not sure about this; might be a better idea to look
1524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # for tag/attrib/text attributes
1534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return isinstance(element, Element) or hasattr(element, "tag")
1544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm##
1564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Element class.  This class defines the Element interface, and
1574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# provides a reference implementation of this interface.
1584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# <p>
1594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# The element name, attribute names, and attribute values can be
1604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# either ASCII strings (ordinary Python strings containing only 7-bit
1614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# ASCII characters) or Unicode strings.
1624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
1634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param tag The element name.
1644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param attrib An optional dictionary, containing element attributes.
1654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param **extra Additional attributes, given as keyword arguments.
1664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @see Element
1674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @see SubElement
1684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @see Comment
1694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @see ProcessingInstruction
1704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass Element(object):
1724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # <tag attrib>text<child/>...</tag>tail
1734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
1754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # (Attribute) Element tag.
1764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    tag = None
1784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
1804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # (Attribute) Element attribute dictionary.  Where possible, use
1814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # {@link #Element.get},
1824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # {@link #Element.set},
1834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # {@link #Element.keys}, and
1844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # {@link #Element.items} to access
1854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # element attributes.
1864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    attrib = None
1884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
1904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # (Attribute) Text before first subelement.  This is either a
1914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # string or the value None.  Note that if there was no text, this
1924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # attribute may be either None or an empty string, depending on
1934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # the parser.
1944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    text = None
1964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
1984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # (Attribute) Text after this element's end tag, but before the
1994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # next sibling element's start tag.  This is either a string or
2004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # the value None.  Note that if there was no text, this attribute
2014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # may be either None or an empty string, depending on the parser.
2024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    tail = None # text after end tag, if any
2044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # constructor
2064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __init__(self, tag, attrib={}, **extra):
2084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        attrib = attrib.copy()
2094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        attrib.update(extra)
2104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.tag = tag
2114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.attrib = attrib
2124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._children = []
2134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __repr__(self):
2154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return "<Element %s at 0x%x>" % (repr(self.tag), id(self))
2164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
2184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Creates a new element object of the same type as this element.
2194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
2204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param tag Element tag.
2214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param attrib Element attributes, given as a dictionary.
2224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @return A new element instance.
2234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def makeelement(self, tag, attrib):
2254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self.__class__(tag, attrib)
2264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
2284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # (Experimental) Copies the current element.  This creates a
2294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # shallow copy; subelements will be shared with the original tree.
2304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
2314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @return A new element instance.
2324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def copy(self):
2344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        elem = self.makeelement(self.tag, self.attrib)
2354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        elem.text = self.text
2364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        elem.tail = self.tail
2374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        elem[:] = self
2384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return elem
2394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
2414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Returns the number of subelements.  Note that this only counts
2424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # full elements; to check if there's any content in an element, you
2434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # have to check both the length and the <b>text</b> attribute.
2444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
2454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @return The number of subelements.
2464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __len__(self):
2484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return len(self._children)
2494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __nonzero__(self):
2514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        warnings.warn(
2524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            "The behavior of this method will change in future versions.  "
2534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            "Use specific 'len(elem)' or 'elem is not None' test instead.",
2544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            FutureWarning, stacklevel=2
2554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            )
2564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return len(self._children) != 0 # emulate old behaviour, for now
2574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
2594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Returns the given subelement, by index.
2604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
2614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param index What subelement to return.
2624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @return The given subelement.
2634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @exception IndexError If the given element does not exist.
2644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __getitem__(self, index):
2664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self._children[index]
2674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
2694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Replaces the given subelement, by index.
2704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
2714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param index What subelement to replace.
2724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param element The new element value.
2734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @exception IndexError If the given element does not exist.
2744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __setitem__(self, index, element):
2764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # if isinstance(index, slice):
2774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        #     for elt in element:
2784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        #         assert iselement(elt)
2794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # else:
2804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        #     assert iselement(element)
2814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._children[index] = element
2824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
2844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Deletes the given subelement, by index.
2854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
2864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param index What subelement to delete.
2874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @exception IndexError If the given element does not exist.
2884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __delitem__(self, index):
2904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        del self._children[index]
2914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
2934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Adds a subelement to the end of this element.  In document order,
2944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # the new element will appear after the last existing subelement (or
2954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # directly after the text, if it's the first subelement), but before
2964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # the end tag for this element.
2974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
2984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param element The element to add.
2994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def append(self, element):
3014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # assert iselement(element)
3024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._children.append(element)
3034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
3054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Appends subelements from a sequence.
3064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
3074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param elements A sequence object with zero or more elements.
3084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @since 1.3
3094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def extend(self, elements):
3114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # for element in elements:
3124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        #     assert iselement(element)
3134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._children.extend(elements)
3144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
3164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Inserts a subelement at the given position in this element.
3174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
3184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param index Where to insert the new subelement.
3194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def insert(self, index, element):
3214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # assert iselement(element)
3224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._children.insert(index, element)
3234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
3254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Removes a matching subelement.  Unlike the <b>find</b> methods,
3264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # this method compares elements based on identity, not on tag
3274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # value or contents.  To remove subelements by other means, the
3284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # easiest way is often to use a list comprehension to select what
3294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # elements to keep, and use slice assignment to update the parent
3304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # element.
3314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
3324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param element What element to remove.
3334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @exception ValueError If a matching element could not be found.
3344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def remove(self, element):
3364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # assert iselement(element)
3374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._children.remove(element)
3384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
3404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # (Deprecated) Returns all subelements.  The elements are returned
3414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # in document order.
3424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
3434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @return A list of subelements.
3444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @defreturn list of Element instances
3454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def getchildren(self):
3474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        warnings.warn(
3484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            "This method will be removed in future versions.  "
3494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            "Use 'list(elem)' or iteration over elem instead.",
3504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            DeprecationWarning, stacklevel=2
3514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            )
3524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self._children
3534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
3554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Finds the first matching subelement, by tag name or path.
3564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
3574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param path What element to look for.
3584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @keyparam namespaces Optional namespace prefix map.
3594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @return The first matching element, or None if no element was found.
3604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @defreturn Element or None
3614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def find(self, path, namespaces=None):
3634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return ElementPath.find(self, path, namespaces)
3644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
3664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Finds text for the first matching subelement, by tag name or path.
3674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
3684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param path What element to look for.
3694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param default What to return if the element was not found.
3704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @keyparam namespaces Optional namespace prefix map.
3714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @return The text content of the first matching element, or the
3724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #     default value no element was found.  Note that if the element
3734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #     is found, but has no text content, this method returns an
3744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #     empty string.
3754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @defreturn string
3764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def findtext(self, path, default=None, namespaces=None):
3784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return ElementPath.findtext(self, path, default, namespaces)
3794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
3814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Finds all matching subelements, by tag name or path.
3824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
3834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param path What element to look for.
3844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @keyparam namespaces Optional namespace prefix map.
3854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @return A list or other sequence containing all matching elements,
3864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #    in document order.
3874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @defreturn list of Element instances
3884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def findall(self, path, namespaces=None):
3904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return ElementPath.findall(self, path, namespaces)
3914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
3934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Finds all matching subelements, by tag name or path.
3944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
3954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param path What element to look for.
3964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @keyparam namespaces Optional namespace prefix map.
3974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @return An iterator or sequence containing all matching elements,
3984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #    in document order.
3994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @defreturn a generated sequence of Element instances
4004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def iterfind(self, path, namespaces=None):
4024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return ElementPath.iterfind(self, path, namespaces)
4034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
4054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Resets an element.  This function removes all subelements, clears
4064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # all attributes, and sets the <b>text</b> and <b>tail</b> attributes
4074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # to None.
4084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def clear(self):
4104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.attrib.clear()
4114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._children = []
4124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.text = self.tail = None
4134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
4154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Gets an element attribute.  Equivalent to <b>attrib.get</b>, but
4164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # some implementations may handle this a bit more efficiently.
4174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
4184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param key What attribute to look for.
4194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param default What to return if the attribute was not found.
4204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @return The attribute value, or the default value, if the
4214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #     attribute was not found.
4224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @defreturn string or None
4234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def get(self, key, default=None):
4254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self.attrib.get(key, default)
4264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
4284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Sets an element attribute.  Equivalent to <b>attrib[key] = value</b>,
4294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # but some implementations may handle this a bit more efficiently.
4304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
4314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param key What attribute to set.
4324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param value The attribute value.
4334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def set(self, key, value):
4354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.attrib[key] = value
4364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
4384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Gets a list of attribute names.  The names are returned in an
4394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # arbitrary order (just like for an ordinary Python dictionary).
4404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Equivalent to <b>attrib.keys()</b>.
4414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
4424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @return A list of element attribute names.
4434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @defreturn list of strings
4444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def keys(self):
4464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self.attrib.keys()
4474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
4494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Gets element attributes, as a sequence.  The attributes are
4504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # returned in an arbitrary order.  Equivalent to <b>attrib.items()</b>.
4514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
4524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @return A list of (name, value) tuples for all attributes.
4534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @defreturn list of (string, string) tuples
4544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def items(self):
4564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self.attrib.items()
4574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
4594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Creates a tree iterator.  The iterator loops over this element
4604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # and all subelements, in document order, and returns all elements
4614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # with a matching tag.
4624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # <p>
4634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # If the tree structure is modified during iteration, new or removed
4644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # elements may or may not be included.  To get a stable set, use the
4654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # list() function on the iterator, and loop over the resulting list.
4664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
4674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param tag What tags to look for (default is to return all elements).
4684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @return An iterator containing all the matching elements.
4694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @defreturn iterator
4704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def iter(self, tag=None):
4724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if tag == "*":
4734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            tag = None
4744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if tag is None or self.tag == tag:
4754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            yield self
4764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for e in self._children:
4774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for e in e.iter(tag):
4784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                yield e
4794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # compatibility
4814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def getiterator(self, tag=None):
4824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Change for a DeprecationWarning in 1.4
4834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        warnings.warn(
4844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            "This method will be removed in future versions.  "
4854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            "Use 'elem.iter()' or 'list(elem.iter())' instead.",
4864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            PendingDeprecationWarning, stacklevel=2
4874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        )
4884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return list(self.iter(tag))
4894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
4914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Creates a text iterator.  The iterator loops over this element
4924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # and all subelements, in document order, and returns all inner
4934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # text.
4944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
4954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @return An iterator containing all inner text.
4964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @defreturn iterator
4974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def itertext(self):
4994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        tag = self.tag
5004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if not isinstance(tag, basestring) and tag is not None:
5014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return
5024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.text:
5034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            yield self.text
5044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for e in self:
5054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for s in e.itertext():
5064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                yield s
5074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if e.tail:
5084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                yield e.tail
5094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# compatibility
5114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm_Element = _ElementInterface = Element
5124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm##
5144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Subelement factory.  This function creates an element instance, and
5154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# appends it to an existing element.
5164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# <p>
5174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# The element name, attribute names, and attribute values can be
5184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# either 8-bit ASCII strings or Unicode strings.
5194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
5204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param parent The parent element.
5214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param tag The subelement name.
5224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param attrib An optional dictionary, containing element attributes.
5234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param **extra Additional attributes, given as keyword arguments.
5244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @return An element instance.
5254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @defreturn Element
5264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef SubElement(parent, tag, attrib={}, **extra):
5284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    attrib = attrib.copy()
5294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    attrib.update(extra)
5304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    element = parent.makeelement(tag, attrib)
5314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    parent.append(element)
5324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return element
5334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm##
5354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Comment element factory.  This factory function creates a special
5364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# element that will be serialized as an XML comment by the standard
5374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# serializer.
5384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# <p>
5394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# The comment string can be either an 8-bit ASCII string or a Unicode
5404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# string.
5414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
5424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param text A string containing the comment string.
5434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @return An element instance, representing a comment.
5444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @defreturn Element
5454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef Comment(text=None):
5474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    element = Element(Comment)
5484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    element.text = text
5494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return element
5504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm##
5524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# PI element factory.  This factory function creates a special element
5534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# that will be serialized as an XML processing instruction by the standard
5544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# serializer.
5554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
5564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param target A string containing the PI target.
5574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param text A string containing the PI contents, if any.
5584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @return An element instance, representing a PI.
5594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @defreturn Element
5604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef ProcessingInstruction(target, text=None):
5624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    element = Element(ProcessingInstruction)
5634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    element.text = target
5644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if text:
5654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        element.text = element.text + " " + text
5664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return element
5674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmPI = ProcessingInstruction
5694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm##
5714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# QName wrapper.  This can be used to wrap a QName attribute value, in
5724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# order to get proper namespace handling on output.
5734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
5744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param text A string containing the QName value, in the form {uri}local,
5754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#     or, if the tag argument is given, the URI part of a QName.
5764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param tag Optional tag.  If given, the first argument is interpreted as
5774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#     an URI, and this argument is interpreted as a local name.
5784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @return An opaque object, representing the QName.
5794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass QName(object):
5814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __init__(self, text_or_uri, tag=None):
5824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if tag:
5834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            text_or_uri = "{%s}%s" % (text_or_uri, tag)
5844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.text = text_or_uri
5854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __str__(self):
5864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self.text
5874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __hash__(self):
5884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return hash(self.text)
5894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __cmp__(self, other):
5904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if isinstance(other, QName):
5914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return cmp(self.text, other.text)
5924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return cmp(self.text, other)
5934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# --------------------------------------------------------------------
5954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm##
5974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# ElementTree wrapper class.  This class represents an entire element
5984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# hierarchy, and adds some extra support for serialization to and from
5994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# standard XML.
6004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
6014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param element Optional root element.
6024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @keyparam file Optional file handle or file name.  If given, the
6034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#     tree is initialized with the contents of this XML file.
6044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass ElementTree(object):
6064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __init__(self, element=None, file=None):
6084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # assert element is None or iselement(element)
6094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._root = element # first node
6104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if file:
6114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.parse(file)
6124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
6144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Gets the root element for this tree.
6154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
6164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @return An element instance.
6174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @defreturn Element
6184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def getroot(self):
6204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self._root
6214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
6234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Replaces the root element for this tree.  This discards the
6244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # current contents of the tree, and replaces it with the given
6254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # element.  Use with care.
6264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
6274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param element An element instance.
6284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _setroot(self, element):
6304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # assert iselement(element)
6314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._root = element
6324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
6344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Loads an external XML document into this element tree.
6354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
6364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param source A file name or file object.  If a file object is
6374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #     given, it only has to implement a <b>read(n)</b> method.
6384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @keyparam parser An optional parser instance.  If not given, the
6394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #     standard {@link XMLParser} parser is used.
6404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @return The document root element.
6414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @defreturn Element
6424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @exception ParseError If the parser fails to parse the document.
6434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def parse(self, source, parser=None):
6454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if not hasattr(source, "read"):
6464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            source = open(source, "rb")
6474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if not parser:
6484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            parser = XMLParser(target=TreeBuilder())
6494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        while 1:
6504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            data = source.read(65536)
6514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if not data:
6524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                break
6534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            parser.feed(data)
6544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._root = parser.close()
6554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self._root
6564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
6584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Creates a tree iterator for the root element.  The iterator loops
6594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # over all elements in this tree, in document order.
6604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
6614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param tag What tags to look for (default is to return all elements)
6624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @return An iterator.
6634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @defreturn iterator
6644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def iter(self, tag=None):
6664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # assert self._root is not None
6674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self._root.iter(tag)
6684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # compatibility
6704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def getiterator(self, tag=None):
6714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Change for a DeprecationWarning in 1.4
6724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        warnings.warn(
6734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            "This method will be removed in future versions.  "
6744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            "Use 'tree.iter()' or 'list(tree.iter())' instead.",
6754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            PendingDeprecationWarning, stacklevel=2
6764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        )
6774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return list(self.iter(tag))
6784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
6804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Finds the first toplevel element with given tag.
6814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Same as getroot().find(path).
6824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
6834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param path What element to look for.
6844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @keyparam namespaces Optional namespace prefix map.
6854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @return The first matching element, or None if no element was found.
6864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @defreturn Element or None
6874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def find(self, path, namespaces=None):
6894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # assert self._root is not None
6904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if path[:1] == "/":
6914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            path = "." + path
6924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            warnings.warn(
6934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                "This search is broken in 1.3 and earlier, and will be "
6944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                "fixed in a future version.  If you rely on the current "
6954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                "behaviour, change it to %r" % path,
6964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                FutureWarning, stacklevel=2
6974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                )
6984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self._root.find(path, namespaces)
6994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
7014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Finds the element text for the first toplevel element with given
7024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # tag.  Same as getroot().findtext(path).
7034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
7044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param path What toplevel element to look for.
7054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param default What to return if the element was not found.
7064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @keyparam namespaces Optional namespace prefix map.
7074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @return The text content of the first matching element, or the
7084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #     default value no element was found.  Note that if the element
7094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #     is found, but has no text content, this method returns an
7104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #     empty string.
7114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @defreturn string
7124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def findtext(self, path, default=None, namespaces=None):
7144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # assert self._root is not None
7154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if path[:1] == "/":
7164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            path = "." + path
7174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            warnings.warn(
7184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                "This search is broken in 1.3 and earlier, and will be "
7194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                "fixed in a future version.  If you rely on the current "
7204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                "behaviour, change it to %r" % path,
7214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                FutureWarning, stacklevel=2
7224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                )
7234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self._root.findtext(path, default, namespaces)
7244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
7264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Finds all toplevel elements with the given tag.
7274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Same as getroot().findall(path).
7284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
7294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param path What element to look for.
7304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @keyparam namespaces Optional namespace prefix map.
7314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @return A list or iterator containing all matching elements,
7324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #    in document order.
7334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @defreturn list of Element instances
7344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def findall(self, path, namespaces=None):
7364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # assert self._root is not None
7374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if path[:1] == "/":
7384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            path = "." + path
7394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            warnings.warn(
7404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                "This search is broken in 1.3 and earlier, and will be "
7414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                "fixed in a future version.  If you rely on the current "
7424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                "behaviour, change it to %r" % path,
7434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                FutureWarning, stacklevel=2
7444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                )
7454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self._root.findall(path, namespaces)
7464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
7484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Finds all matching subelements, by tag name or path.
7494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Same as getroot().iterfind(path).
7504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
7514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param path What element to look for.
7524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @keyparam namespaces Optional namespace prefix map.
7534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @return An iterator or sequence containing all matching elements,
7544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #    in document order.
7554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @defreturn a generated sequence of Element instances
7564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def iterfind(self, path, namespaces=None):
7584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # assert self._root is not None
7594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if path[:1] == "/":
7604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            path = "." + path
7614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            warnings.warn(
7624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                "This search is broken in 1.3 and earlier, and will be "
7634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                "fixed in a future version.  If you rely on the current "
7644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                "behaviour, change it to %r" % path,
7654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                FutureWarning, stacklevel=2
7664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                )
7674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self._root.iterfind(path, namespaces)
7684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
7704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Writes the element tree to a file, as XML.
7714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
7724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @def write(file, **options)
7734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param file A file name, or a file object opened for writing.
7744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param **options Options, given as keyword arguments.
7754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @keyparam encoding Optional output encoding (default is US-ASCII).
7764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @keyparam method Optional output method ("xml", "html", "text" or
7774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #     "c14n"; default is "xml").
7784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @keyparam xml_declaration Controls if an XML declaration should
7794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #     be added to the file.  Use False for never, True for always,
7804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #     None for only if not US-ASCII or UTF-8.  None is default.
7814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def write(self, file_or_filename,
7834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm              # keyword arguments
7844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm              encoding=None,
7854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm              xml_declaration=None,
7864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm              default_namespace=None,
7874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm              method=None):
7884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # assert self._root is not None
7894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if not method:
7904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            method = "xml"
7914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        elif method not in _serialize:
7924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # FIXME: raise an ImportError for c14n if ElementC14N is missing?
7934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            raise ValueError("unknown method %r" % method)
7944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if hasattr(file_or_filename, "write"):
7954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            file = file_or_filename
7964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        else:
7974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            file = open(file_or_filename, "wb")
7984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        write = file.write
7994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if not encoding:
8004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if method == "c14n":
8014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                encoding = "utf-8"
8024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            else:
8034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                encoding = "us-ascii"
8044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        elif xml_declaration or (xml_declaration is None and
8054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                                 encoding not in ("utf-8", "us-ascii")):
8064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if method == "xml":
8074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                write("<?xml version='1.0' encoding='%s'?>\n" % encoding)
8084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if method == "text":
8094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            _serialize_text(write, self._root, encoding)
8104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        else:
8114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            qnames, namespaces = _namespaces(
8124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self._root, encoding, default_namespace
8134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                )
8144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            serialize = _serialize[method]
8154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            serialize(write, self._root, encoding, qnames, namespaces)
8164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if file_or_filename is not file:
8174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            file.close()
8184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def write_c14n(self, file):
8204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # lxml.etree compatibility.  use output method instead
8214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self.write(file, method="c14n")
8224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# --------------------------------------------------------------------
8244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# serialization support
8254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _namespaces(elem, encoding, default_namespace=None):
8274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # identify namespaces used in this tree
8284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # maps qnames to *encoded* prefix:local names
8304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    qnames = {None: None}
8314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # maps uri:s to prefixes
8334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    namespaces = {}
8344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if default_namespace:
8354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        namespaces[default_namespace] = ""
8364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def encode(text):
8384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return text.encode(encoding)
8394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def add_qname(qname):
8414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # calculate serialized qname representation
8424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        try:
8434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if qname[:1] == "{":
8444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                uri, tag = qname[1:].rsplit("}", 1)
8454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                prefix = namespaces.get(uri)
8464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if prefix is None:
8474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    prefix = _namespace_map.get(uri)
8484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    if prefix is None:
8494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        prefix = "ns%d" % len(namespaces)
8504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    if prefix != "xml":
8514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        namespaces[uri] = prefix
8524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if prefix:
8534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    qnames[qname] = encode("%s:%s" % (prefix, tag))
8544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                else:
8554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    qnames[qname] = encode(tag) # default element
8564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            else:
8574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if default_namespace:
8584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    # FIXME: can this be handled in XML 1.0?
8594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    raise ValueError(
8604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        "cannot use non-qualified names with "
8614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        "default_namespace option"
8624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        )
8634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                qnames[qname] = encode(qname)
8644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        except TypeError:
8654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            _raise_serialization_error(qname)
8664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # populate qname and namespaces table
8684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    try:
8694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        iterate = elem.iter
8704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    except AttributeError:
8714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        iterate = elem.getiterator # cET compatibility
8724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for elem in iterate():
8734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        tag = elem.tag
8744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if isinstance(tag, QName):
8754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if tag.text not in qnames:
8764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                add_qname(tag.text)
8774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        elif isinstance(tag, basestring):
8784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if tag not in qnames:
8794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                add_qname(tag)
8804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        elif tag is not None and tag is not Comment and tag is not PI:
8814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            _raise_serialization_error(tag)
8824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for key, value in elem.items():
8834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if isinstance(key, QName):
8844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                key = key.text
8854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if key not in qnames:
8864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                add_qname(key)
8874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if isinstance(value, QName) and value.text not in qnames:
8884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                add_qname(value.text)
8894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        text = elem.text
8904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if isinstance(text, QName) and text.text not in qnames:
8914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            add_qname(text.text)
8924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return qnames, namespaces
8934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _serialize_xml(write, elem, encoding, qnames, namespaces):
8954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    tag = elem.tag
8964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    text = elem.text
8974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if tag is Comment:
8984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        write("<!--%s-->" % _encode(text, encoding))
8994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    elif tag is ProcessingInstruction:
9004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        write("<?%s?>" % _encode(text, encoding))
9014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    else:
9024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        tag = qnames[tag]
9034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if tag is None:
9044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if text:
9054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                write(_escape_cdata(text, encoding))
9064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for e in elem:
9074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                _serialize_xml(write, e, encoding, qnames, None)
9084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        else:
9094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            write("<" + tag)
9104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            items = elem.items()
9114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if items or namespaces:
9124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if namespaces:
9134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    for v, k in sorted(namespaces.items(),
9144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                                       key=lambda x: x[1]):  # sort on prefix
9154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        if k:
9164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                            k = ":" + k
9174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        write(" xmlns%s=\"%s\"" % (
9184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                            k.encode(encoding),
9194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                            _escape_attrib(v, encoding)
9204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                            ))
9214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                for k, v in sorted(items):  # lexical order
9224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    if isinstance(k, QName):
9234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        k = k.text
9244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    if isinstance(v, QName):
9254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        v = qnames[v.text]
9264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    else:
9274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        v = _escape_attrib(v, encoding)
9284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    write(" %s=\"%s\"" % (qnames[k], v))
9294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if text or len(elem):
9304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                write(">")
9314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if text:
9324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    write(_escape_cdata(text, encoding))
9334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                for e in elem:
9344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    _serialize_xml(write, e, encoding, qnames, None)
9354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                write("</" + tag + ">")
9364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            else:
9374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                write(" />")
9384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if elem.tail:
9394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        write(_escape_cdata(elem.tail, encoding))
9404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmHTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
9424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm              "img", "input", "isindex", "link", "meta" "param")
9434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmtry:
9454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    HTML_EMPTY = set(HTML_EMPTY)
9464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmexcept NameError:
9474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    pass
9484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _serialize_html(write, elem, encoding, qnames, namespaces):
9504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    tag = elem.tag
9514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    text = elem.text
9524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if tag is Comment:
9534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        write("<!--%s-->" % _escape_cdata(text, encoding))
9544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    elif tag is ProcessingInstruction:
9554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        write("<?%s?>" % _escape_cdata(text, encoding))
9564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    else:
9574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        tag = qnames[tag]
9584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if tag is None:
9594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if text:
9604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                write(_escape_cdata(text, encoding))
9614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for e in elem:
9624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                _serialize_html(write, e, encoding, qnames, None)
9634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        else:
9644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            write("<" + tag)
9654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            items = elem.items()
9664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if items or namespaces:
9674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if namespaces:
9684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    for v, k in sorted(namespaces.items(),
9694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                                       key=lambda x: x[1]):  # sort on prefix
9704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        if k:
9714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                            k = ":" + k
9724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        write(" xmlns%s=\"%s\"" % (
9734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                            k.encode(encoding),
9744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                            _escape_attrib(v, encoding)
9754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                            ))
9764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                for k, v in sorted(items):  # lexical order
9774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    if isinstance(k, QName):
9784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        k = k.text
9794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    if isinstance(v, QName):
9804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        v = qnames[v.text]
9814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    else:
9824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        v = _escape_attrib_html(v, encoding)
9834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    # FIXME: handle boolean attributes
9844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    write(" %s=\"%s\"" % (qnames[k], v))
9854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            write(">")
9864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            tag = tag.lower()
9874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if text:
9884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if tag == "script" or tag == "style":
9894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    write(_encode(text, encoding))
9904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                else:
9914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    write(_escape_cdata(text, encoding))
9924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for e in elem:
9934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                _serialize_html(write, e, encoding, qnames, None)
9944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if tag not in HTML_EMPTY:
9954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                write("</" + tag + ">")
9964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if elem.tail:
9974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        write(_escape_cdata(elem.tail, encoding))
9984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _serialize_text(write, elem, encoding):
10004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for part in elem.itertext():
10014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        write(part.encode(encoding))
10024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if elem.tail:
10034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        write(elem.tail.encode(encoding))
10044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
10054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm_serialize = {
10064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "xml": _serialize_xml,
10074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "html": _serialize_html,
10084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "text": _serialize_text,
10094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# this optional method is imported at the end of the module
10104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#   "c14n": _serialize_c14n,
10114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
10124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
10134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm##
10144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Registers a namespace prefix.  The registry is global, and any
10154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# existing mapping for either the given prefix or the namespace URI
10164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# will be removed.
10174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
10184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param prefix Namespace prefix.
10194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param uri Namespace uri.  Tags and attributes in this namespace
10204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#     will be serialized with the given prefix, if at all possible.
10214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @exception ValueError If the prefix is reserved, or is otherwise
10224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#     invalid.
10234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
10244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef register_namespace(prefix, uri):
10254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if re.match("ns\d+$", prefix):
10264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        raise ValueError("Prefix format reserved for internal use")
10274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for k, v in _namespace_map.items():
10284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if k == uri or v == prefix:
10294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            del _namespace_map[k]
10304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    _namespace_map[uri] = prefix
10314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
10324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm_namespace_map = {
10334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # "well-known" namespace prefixes
10344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "http://www.w3.org/XML/1998/namespace": "xml",
10354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "http://www.w3.org/1999/xhtml": "html",
10364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
10374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "http://schemas.xmlsoap.org/wsdl/": "wsdl",
10384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # xml schema
10394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "http://www.w3.org/2001/XMLSchema": "xs",
10404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "http://www.w3.org/2001/XMLSchema-instance": "xsi",
10414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # dublin core
10424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "http://purl.org/dc/elements/1.1/": "dc",
10434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
10444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
10454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _raise_serialization_error(text):
10464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    raise TypeError(
10474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        "cannot serialize %r (type %s)" % (text, type(text).__name__)
10484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        )
10494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
10504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _encode(text, encoding):
10514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    try:
10524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return text.encode(encoding, "xmlcharrefreplace")
10534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    except (TypeError, AttributeError):
10544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        _raise_serialization_error(text)
10554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
10564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _escape_cdata(text, encoding):
10574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # escape character data
10584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    try:
10594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # it's worth avoiding do-nothing calls for strings that are
10604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # shorter than 500 character, or so.  assume that's, by far,
10614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # the most common case in most applications.
10624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if "&" in text:
10634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            text = text.replace("&", "&amp;")
10644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if "<" in text:
10654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            text = text.replace("<", "&lt;")
10664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if ">" in text:
10674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            text = text.replace(">", "&gt;")
10684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return text.encode(encoding, "xmlcharrefreplace")
10694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    except (TypeError, AttributeError):
10704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        _raise_serialization_error(text)
10714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
10724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _escape_attrib(text, encoding):
10734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # escape attribute value
10744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    try:
10754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if "&" in text:
10764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            text = text.replace("&", "&amp;")
10774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if "<" in text:
10784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            text = text.replace("<", "&lt;")
10794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if ">" in text:
10804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            text = text.replace(">", "&gt;")
10814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if "\"" in text:
10824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            text = text.replace("\"", "&quot;")
10834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if "\n" in text:
10844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            text = text.replace("\n", "&#10;")
10854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return text.encode(encoding, "xmlcharrefreplace")
10864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    except (TypeError, AttributeError):
10874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        _raise_serialization_error(text)
10884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
10894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _escape_attrib_html(text, encoding):
10904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # escape attribute value
10914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    try:
10924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if "&" in text:
10934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            text = text.replace("&", "&amp;")
10944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if ">" in text:
10954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            text = text.replace(">", "&gt;")
10964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if "\"" in text:
10974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            text = text.replace("\"", "&quot;")
10984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return text.encode(encoding, "xmlcharrefreplace")
10994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    except (TypeError, AttributeError):
11004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        _raise_serialization_error(text)
11014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# --------------------------------------------------------------------
11034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm##
11054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Generates a string representation of an XML element, including all
11064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# subelements.
11074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
11084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param element An Element instance.
11094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @keyparam encoding Optional output encoding (default is US-ASCII).
11104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @keyparam method Optional output method ("xml", "html", "text" or
11114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#     "c14n"; default is "xml").
11124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @return An encoded string containing the XML data.
11134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @defreturn string
11144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef tostring(element, encoding=None, method=None):
11164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    class dummy:
11174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        pass
11184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    data = []
11194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    file = dummy()
11204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    file.write = data.append
11214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ElementTree(element).write(file, encoding, method=method)
11224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return "".join(data)
11234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm##
11254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Generates a string representation of an XML element, including all
11264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# subelements.  The string is returned as a sequence of string fragments.
11274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
11284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param element An Element instance.
11294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @keyparam encoding Optional output encoding (default is US-ASCII).
11304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @keyparam method Optional output method ("xml", "html", "text" or
11314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#     "c14n"; default is "xml").
11324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @return A sequence object containing the XML data.
11334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @defreturn sequence
11344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @since 1.3
11354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef tostringlist(element, encoding=None, method=None):
11374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    class dummy:
11384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        pass
11394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    data = []
11404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    file = dummy()
11414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    file.write = data.append
11424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ElementTree(element).write(file, encoding, method=method)
11434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # FIXME: merge small fragments into larger parts
11444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return data
11454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm##
11474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Writes an element tree or element structure to sys.stdout.  This
11484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# function should be used for debugging only.
11494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# <p>
11504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# The exact output format is implementation dependent.  In this
11514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# version, it's written as an ordinary XML file.
11524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
11534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param elem An element tree or an individual element.
11544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef dump(elem):
11564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # debugging
11574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if not isinstance(elem, ElementTree):
11584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        elem = ElementTree(elem)
11594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    elem.write(sys.stdout)
11604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    tail = elem.getroot().tail
11614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if not tail or tail[-1] != "\n":
11624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        sys.stdout.write("\n")
11634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# --------------------------------------------------------------------
11654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# parsing
11664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm##
11684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Parses an XML document into an element tree.
11694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
11704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param source A filename or file object containing XML data.
11714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param parser An optional parser instance.  If not given, the
11724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#     standard {@link XMLParser} parser is used.
11734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @return An ElementTree instance
11744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef parse(source, parser=None):
11764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    tree = ElementTree()
11774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    tree.parse(source, parser)
11784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return tree
11794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm##
11814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Parses an XML document into an element tree incrementally, and reports
11824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# what's going on to the user.
11834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
11844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param source A filename or file object containing XML data.
11854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param events A list of events to report back.  If omitted, only "end"
11864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#     events are reported.
11874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param parser An optional parser instance.  If not given, the
11884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#     standard {@link XMLParser} parser is used.
11894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @return A (event, elem) iterator.
11904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef iterparse(source, events=None, parser=None):
11924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if not hasattr(source, "read"):
11934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        source = open(source, "rb")
11944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if not parser:
11954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        parser = XMLParser(target=TreeBuilder())
11964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return _IterParseIterator(source, events, parser)
11974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass _IterParseIterator(object):
11994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __init__(self, source, events, parser):
12014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._file = source
12024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._events = []
12034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._index = 0
12044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.root = self._root = None
12054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._parser = parser
12064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # wire up the parser for event reporting
12074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        parser = self._parser._parser
12084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        append = self._events.append
12094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if events is None:
12104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            events = ["end"]
12114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for event in events:
12124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if event == "start":
12134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                try:
12144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    parser.ordered_attributes = 1
12154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    parser.specified_attributes = 1
12164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    def handler(tag, attrib_in, event=event, append=append,
12174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                                start=self._parser._start_list):
12184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        append((event, start(tag, attrib_in)))
12194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    parser.StartElementHandler = handler
12204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                except AttributeError:
12214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    def handler(tag, attrib_in, event=event, append=append,
12224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                                start=self._parser._start):
12234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        append((event, start(tag, attrib_in)))
12244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    parser.StartElementHandler = handler
12254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            elif event == "end":
12264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                def handler(tag, event=event, append=append,
12274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                            end=self._parser._end):
12284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    append((event, end(tag)))
12294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                parser.EndElementHandler = handler
12304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            elif event == "start-ns":
12314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                def handler(prefix, uri, event=event, append=append):
12324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    try:
12334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        uri = (uri or "").encode("ascii")
12344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    except UnicodeError:
12354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        pass
12364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    append((event, (prefix or "", uri or "")))
12374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                parser.StartNamespaceDeclHandler = handler
12384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            elif event == "end-ns":
12394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                def handler(prefix, event=event, append=append):
12404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    append((event, None))
12414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                parser.EndNamespaceDeclHandler = handler
12424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            else:
12434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                raise ValueError("unknown event %r" % event)
12444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def next(self):
12464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        while 1:
12474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            try:
12484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                item = self._events[self._index]
12494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            except IndexError:
12504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if self._parser is None:
12514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    self.root = self._root
12524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    raise StopIteration
12534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                # load event buffer
12544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                del self._events[:]
12554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self._index = 0
12564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                data = self._file.read(16384)
12574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if data:
12584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    self._parser.feed(data)
12594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                else:
12604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    self._root = self._parser.close()
12614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    self._parser = None
12624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            else:
12634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self._index = self._index + 1
12644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return item
12654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __iter__(self):
12674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self
12684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm##
12704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Parses an XML document from a string constant.  This function can
12714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# be used to embed "XML literals" in Python code.
12724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
12734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param source A string containing XML data.
12744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param parser An optional parser instance.  If not given, the
12754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#     standard {@link XMLParser} parser is used.
12764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @return An Element instance.
12774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @defreturn Element
12784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef XML(text, parser=None):
12804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if not parser:
12814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        parser = XMLParser(target=TreeBuilder())
12824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    parser.feed(text)
12834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return parser.close()
12844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm##
12864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Parses an XML document from a string constant, and also returns
12874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# a dictionary which maps from element id:s to elements.
12884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
12894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param source A string containing XML data.
12904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param parser An optional parser instance.  If not given, the
12914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#     standard {@link XMLParser} parser is used.
12924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @return A tuple containing an Element instance and a dictionary.
12934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @defreturn (Element, dictionary)
12944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef XMLID(text, parser=None):
12964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if not parser:
12974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        parser = XMLParser(target=TreeBuilder())
12984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    parser.feed(text)
12994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    tree = parser.close()
13004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ids = {}
13014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for elem in tree.iter():
13024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        id = elem.get("id")
13034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if id:
13044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ids[id] = elem
13054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return tree, ids
13064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm##
13084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Parses an XML document from a string constant.  Same as {@link #XML}.
13094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
13104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @def fromstring(text)
13114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param source A string containing XML data.
13124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @return An Element instance.
13134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @defreturn Element
13144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmfromstring = XML
13164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm##
13184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Parses an XML document from a sequence of string fragments.
13194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
13204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param sequence A list or other sequence containing XML data fragments.
13214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param parser An optional parser instance.  If not given, the
13224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#     standard {@link XMLParser} parser is used.
13234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @return An Element instance.
13244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @defreturn Element
13254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @since 1.3
13264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef fromstringlist(sequence, parser=None):
13284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if not parser:
13294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        parser = XMLParser(target=TreeBuilder())
13304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for text in sequence:
13314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        parser.feed(text)
13324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return parser.close()
13334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# --------------------------------------------------------------------
13354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm##
13374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Generic element structure builder.  This builder converts a sequence
13384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link
13394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# #TreeBuilder.end} method calls to a well-formed element structure.
13404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# <p>
13414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# You can use this class to build an element structure using a custom XML
13424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# parser, or a parser for some other XML-like format.
13434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
13444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param element_factory Optional element factory.  This factory
13454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#    is called to create new Element instances, as necessary.
13464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass TreeBuilder(object):
13484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __init__(self, element_factory=None):
13504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._data = [] # data collector
13514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._elem = [] # element stack
13524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._last = None # last element
13534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._tail = None # true if we're after an end tag
13544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if element_factory is None:
13554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            element_factory = Element
13564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._factory = element_factory
13574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
13594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Flushes the builder buffers, and returns the toplevel document
13604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # element.
13614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
13624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @return An Element instance.
13634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @defreturn Element
13644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def close(self):
13664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        assert len(self._elem) == 0, "missing end tags"
13674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        assert self._last is not None, "missing toplevel element"
13684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self._last
13694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _flush(self):
13714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self._data:
13724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if self._last is not None:
13734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                text = "".join(self._data)
13744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if self._tail:
13754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    assert self._last.tail is None, "internal error (tail)"
13764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    self._last.tail = text
13774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                else:
13784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    assert self._last.text is None, "internal error (text)"
13794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    self._last.text = text
13804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._data = []
13814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
13834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Adds text to the current element.
13844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
13854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param data A string.  This should be either an 8-bit string
13864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #    containing ASCII text, or a Unicode string.
13874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def data(self, data):
13894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._data.append(data)
13904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
13924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Opens a new element.
13934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
13944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param tag The element name.
13954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param attrib A dictionary containing element attributes.
13964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @return The opened element.
13974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @defreturn Element
13984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def start(self, tag, attrs):
14004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._flush()
14014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._last = elem = self._factory(tag, attrs)
14024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self._elem:
14034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._elem[-1].append(elem)
14044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._elem.append(elem)
14054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._tail = 0
14064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return elem
14074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
14084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
14094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Closes the current element.
14104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
14114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param tag The element name.
14124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @return The closed element.
14134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @defreturn Element
14144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
14154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def end(self, tag):
14164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._flush()
14174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._last = self._elem.pop()
14184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        assert self._last.tag == tag,\
14194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm               "end tag mismatch (expected %s, got %s)" % (
14204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                   self._last.tag, tag)
14214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._tail = 1
14224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self._last
14234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
14244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm##
14254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Element structure builder for XML source data, based on the
14264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# <b>expat</b> parser.
14274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
14284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @keyparam target Target object.  If omitted, the builder uses an
14294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#     instance of the standard {@link #TreeBuilder} class.
14304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @keyparam html Predefine HTML entities.  This flag is not supported
14314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#     by the current implementation.
14324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @keyparam encoding Optional encoding.  If given, the value overrides
14334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#     the encoding specified in the XML file.
14344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @see #ElementTree
14354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @see #TreeBuilder
14364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
14374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass XMLParser(object):
14384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
14394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __init__(self, html=0, target=None, encoding=None):
14404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        try:
14414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            from xml.parsers import expat
14424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        except ImportError:
14434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            try:
14444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                import pyexpat as expat
14454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            except ImportError:
14464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                raise ImportError(
14474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    "No module named expat; use SimpleXMLTreeBuilder instead"
14484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    )
14494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        parser = expat.ParserCreate(encoding, "}")
14504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if target is None:
14514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            target = TreeBuilder()
14524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # underscored names are provided for compatibility only
14534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.parser = self._parser = parser
14544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.target = self._target = target
14554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._error = expat.error
14564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._names = {} # name memo cache
14574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # callbacks
14584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        parser.DefaultHandlerExpand = self._default
14594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        parser.StartElementHandler = self._start
14604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        parser.EndElementHandler = self._end
14614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        parser.CharacterDataHandler = self._data
14624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # optional callbacks
14634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        parser.CommentHandler = self._comment
14644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        parser.ProcessingInstructionHandler = self._pi
14654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # let expat do the buffering, if supported
14664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        try:
14674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._parser.buffer_text = 1
14684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        except AttributeError:
14694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            pass
14704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # use new-style attribute handling, if supported
14714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        try:
14724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._parser.ordered_attributes = 1
14734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._parser.specified_attributes = 1
14744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            parser.StartElementHandler = self._start_list
14754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        except AttributeError:
14764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            pass
14774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._doctype = None
14784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.entity = {}
14794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        try:
14804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.version = "Expat %d.%d.%d" % expat.version_info
14814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        except AttributeError:
14824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            pass # unknown
14834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
14844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _raiseerror(self, value):
14854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        err = ParseError(value)
14864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        err.code = value.code
14874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        err.position = value.lineno, value.offset
14884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        raise err
14894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
14904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _fixtext(self, text):
14914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # convert text string to ascii, if possible
14924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        try:
14934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return text.encode("ascii")
14944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        except UnicodeError:
14954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return text
14964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
14974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _fixname(self, key):
14984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # expand qname, and convert name string to ascii, if possible
14994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        try:
15004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            name = self._names[key]
15014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        except KeyError:
15024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            name = key
15034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if "}" in name:
15044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                name = "{" + name
15054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._names[key] = name = self._fixtext(name)
15064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return name
15074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
15084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _start(self, tag, attrib_in):
15094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        fixname = self._fixname
15104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        fixtext = self._fixtext
15114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        tag = fixname(tag)
15124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        attrib = {}
15134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for key, value in attrib_in.items():
15144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            attrib[fixname(key)] = fixtext(value)
15154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self.target.start(tag, attrib)
15164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
15174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _start_list(self, tag, attrib_in):
15184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        fixname = self._fixname
15194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        fixtext = self._fixtext
15204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        tag = fixname(tag)
15214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        attrib = {}
15224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if attrib_in:
15234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for i in range(0, len(attrib_in), 2):
15244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                attrib[fixname(attrib_in[i])] = fixtext(attrib_in[i+1])
15254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self.target.start(tag, attrib)
15264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
15274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _data(self, text):
15284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self.target.data(self._fixtext(text))
15294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
15304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _end(self, tag):
15314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self.target.end(self._fixname(tag))
15324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
15334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _comment(self, data):
15344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        try:
15354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            comment = self.target.comment
15364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        except AttributeError:
15374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            pass
15384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        else:
15394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return comment(self._fixtext(data))
15404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
15414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _pi(self, target, data):
15424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        try:
15434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            pi = self.target.pi
15444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        except AttributeError:
15454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            pass
15464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        else:
15474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return pi(self._fixtext(target), self._fixtext(data))
15484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
15494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _default(self, text):
15504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        prefix = text[:1]
15514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if prefix == "&":
15524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # deal with undefined entities
15534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            try:
15544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self.target.data(self.entity[text[1:-1]])
15554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            except KeyError:
15564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                from xml.parsers import expat
15574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                err = expat.error(
15584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    "undefined entity %s: line %d, column %d" %
15594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    (text, self._parser.ErrorLineNumber,
15604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    self._parser.ErrorColumnNumber)
15614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    )
15624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                err.code = 11 # XML_ERROR_UNDEFINED_ENTITY
15634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                err.lineno = self._parser.ErrorLineNumber
15644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                err.offset = self._parser.ErrorColumnNumber
15654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                raise err
15664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        elif prefix == "<" and text[:9] == "<!DOCTYPE":
15674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._doctype = [] # inside a doctype declaration
15684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        elif self._doctype is not None:
15694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # parse doctype contents
15704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if prefix == ">":
15714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self._doctype = None
15724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return
15734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            text = text.strip()
15744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if not text:
15754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return
15764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._doctype.append(text)
15774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            n = len(self._doctype)
15784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if n > 2:
15794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                type = self._doctype[1]
15804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if type == "PUBLIC" and n == 4:
15814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    name, type, pubid, system = self._doctype
15824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                elif type == "SYSTEM" and n == 3:
15834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    name, type, system = self._doctype
15844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    pubid = None
15854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                else:
15864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    return
15874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if pubid:
15884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    pubid = pubid[1:-1]
15894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if hasattr(self.target, "doctype"):
15904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    self.target.doctype(name, pubid, system[1:-1])
15914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                elif self.doctype is not self._XMLParser__doctype:
15924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    # warn about deprecated call
15934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    self._XMLParser__doctype(name, pubid, system[1:-1])
15944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    self.doctype(name, pubid, system[1:-1])
15954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self._doctype = None
15964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
15974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
15984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # (Deprecated) Handles a doctype declaration.
15994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
16004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param name Doctype name.
16014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param pubid Public identifier.
16024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param system System identifier.
16034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
16044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def doctype(self, name, pubid, system):
16054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """This method of XMLParser is deprecated."""
16064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        warnings.warn(
16074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            "This method of XMLParser is deprecated.  Define doctype() "
16084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            "method on the TreeBuilder target.",
16094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            DeprecationWarning,
16104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            )
16114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
16124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # sentinel, if doctype is redefined in a subclass
16134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    __doctype = doctype
16144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
16154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
16164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Feeds data to the parser.
16174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
16184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @param data Encoded data.
16194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
16204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def feed(self, data):
16214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        try:
16224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._parser.Parse(data, 0)
16234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        except self._error, v:
16244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._raiseerror(v)
16254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
16264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ##
16274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Finishes feeding data to the parser.
16284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #
16294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @return An element structure.
16304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # @defreturn Element
16314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
16324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def close(self):
16334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        try:
16344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._parser.Parse("", 1) # end of data
16354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        except self._error, v:
16364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._raiseerror(v)
16374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        tree = self.target.close()
16384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        del self.target, self._parser # get rid of circular references
16394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return tree
16404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
16414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# compatibility
16424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmXMLTreeBuilder = XMLParser
16434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
16444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# workaround circular import.
16454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmtry:
16464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    from ElementC14N import _serialize_c14n
16474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    _serialize["c14n"] = _serialize_c14n
16484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmexcept ImportError:
16494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    pass
1650