10a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
20a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# ElementTree
30a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# $Id: ElementTree.py 3440 2008-07-18 14:45:01Z fredrik $
40a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
50a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# light-weight XML support for Python 2.3 and later.
60a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
70a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# history (since 1.2.6):
80a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 2005-11-12 fl   added tostringlist/fromstringlist helpers
90a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 2006-07-05 fl   merged in selected changes from the 1.3 sandbox
100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 2006-07-05 fl   removed support for 2.1 and earlier
110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 2007-06-21 fl   added deprecation/future warnings
120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 2007-08-25 fl   added doctype hook, added parser version attribute etc
130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 2007-08-26 fl   added new serializer code (better namespace handling, etc)
140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 2007-08-27 fl   warn for broken /tag searches on tree level
150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 2007-09-02 fl   added html/text methods to serializer (experimental)
160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 2007-09-05 fl   added method argument to tostring/tostringlist
170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 2007-09-06 fl   improved error handling
180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 2007-09-13 fl   added itertext, iterfind; assorted cleanups
190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 2007-12-15 fl   added C14N hooks, copy method (experimental)
200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Copyright (c) 1999-2008 by Fredrik Lundh.  All rights reserved.
220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# fredrik@pythonware.com
240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# http://www.pythonware.com
250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# --------------------------------------------------------------------
270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# The ElementTree toolkit is
280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Copyright (c) 1999-2008 by Fredrik Lundh
300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# By obtaining, using, and/or copying this software and/or its
320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# associated documentation, you agree that you have read, understood,
330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# and will comply with the following terms and conditions:
340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Permission to use, copy, modify, and distribute this software and
360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# its associated documentation for any purpose and without fee is
370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# hereby granted, provided that the above copyright notice appears in
380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# all copies, and that both that copyright notice and this permission
390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# notice appear in supporting documentation, and that the name of
400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Secret Labs AB or the author not be used in advertising or publicity
410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# pertaining to distribution of the software without specific, written
420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# prior permission.
430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# OF THIS SOFTWARE.
520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# --------------------------------------------------------------------
530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Licensed to PSF under a Contributor Agreement.
550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# See http://www.python.org/psf/license for licensing details.
560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao__all__ = [
580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # public symbols
590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "Comment",
600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "dump",
610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "Element", "ElementTree",
620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "fromstring", "fromstringlist",
630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "iselement", "iterparse",
640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "parse", "ParseError",
650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "PI", "ProcessingInstruction",
660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "QName",
670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "SubElement",
680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "tostring", "tostringlist",
690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "TreeBuilder",
700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "VERSION",
710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "XML",
720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "XMLParser", "XMLTreeBuilder",
730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ]
740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
750a8c90248264a8b26970b4473770bcc3df8515fJosh GaoVERSION = "1.3.0"
760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao##
780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# The <b>Element</b> type is a flexible container object, designed to
790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# store hierarchical data structures in memory. The type can be
800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# described as a cross between a list and a dictionary.
810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# <p>
820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Each element has a number of properties associated with it:
830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# <ul>
840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# <li>a <i>tag</i>. This is a string identifying what kind of data
850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# this element represents (the element type, in other words).</li>
860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# <li>a number of <i>attributes</i>, stored in a Python dictionary.</li>
870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# <li>a <i>text</i> string.</li>
880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# <li>an optional <i>tail</i> string.</li>
890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# <li>a number of <i>child elements</i>, stored in a Python sequence</li>
900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# </ul>
910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# To create an element instance, use the {@link #Element} constructor
930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# or the {@link #SubElement} factory function.
940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# <p>
950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# The {@link #ElementTree} class can be used to wrap an element
960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# structure, and convert it from and to XML.
970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao##
980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
990a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport sys
1000a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport re
1010a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport warnings
1020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1040a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass _SimpleElementPath(object):
1050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # emulate pre-1.2 find/findtext/findall behaviour
1060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def find(self, element, tag, namespaces=None):
1070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for elem in element:
1080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if elem.tag == tag:
1090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                return elem
1100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return None
1110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def findtext(self, element, tag, default=None, namespaces=None):
1120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elem = self.find(element, tag)
1130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if elem is None:
1140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return default
1150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return elem.text or ""
1160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def iterfind(self, element, tag, namespaces=None):
1170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if tag[:3] == ".//":
1180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            for elem in element.iter(tag[3:]):
1190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                yield elem
1200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for elem in element:
1210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if elem.tag == tag:
1220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                yield elem
1230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def findall(self, element, tag, namespaces=None):
1240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return list(self.iterfind(element, tag, namespaces))
1250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1260a8c90248264a8b26970b4473770bcc3df8515fJosh Gaotry:
1270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    from . import ElementPath
1280a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoexcept ImportError:
1290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ElementPath = _SimpleElementPath()
1300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao##
1320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Parser error.  This is a subclass of <b>SyntaxError</b>.
1330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# <p>
1340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# In addition to the exception value, an exception instance contains a
1350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# specific exception code in the <b>code</b> attribute, and the line and
1360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# column of the error in the <b>position</b> attribute.
1370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1380a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass ParseError(SyntaxError):
1390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    pass
1400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# --------------------------------------------------------------------
1420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao##
1440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Checks if an object appears to be a valid element object.
1450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
1460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param An element instance.
1470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @return A true value if this is an element object.
1480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @defreturn flag
1490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1500a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef iselement(element):
1510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # FIXME: not sure about this; might be a better idea to look
1520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # for tag/attrib/text attributes
1530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return isinstance(element, Element) or hasattr(element, "tag")
1540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao##
1560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Element class.  This class defines the Element interface, and
1570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# provides a reference implementation of this interface.
1580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# <p>
1590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# The element name, attribute names, and attribute values can be
1600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# either ASCII strings (ordinary Python strings containing only 7-bit
1610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# ASCII characters) or Unicode strings.
1620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
1630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param tag The element name.
1640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param attrib An optional dictionary, containing element attributes.
1650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param **extra Additional attributes, given as keyword arguments.
1660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @see Element
1670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @see SubElement
1680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @see Comment
1690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @see ProcessingInstruction
1700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1710a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass Element(object):
1720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # <tag attrib>text<child/>...</tag>tail
1730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
1750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # (Attribute) Element tag.
1760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    tag = None
1780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
1800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # (Attribute) Element attribute dictionary.  Where possible, use
1810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # {@link #Element.get},
1820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # {@link #Element.set},
1830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # {@link #Element.keys}, and
1840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # {@link #Element.items} to access
1850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # element attributes.
1860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    attrib = None
1880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
1900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # (Attribute) Text before first subelement.  This is either a
1910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # string or the value None.  Note that if there was no text, this
1920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # attribute may be either None or an empty string, depending on
1930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # the parser.
1940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    text = None
1960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
1980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # (Attribute) Text after this element's end tag, but before the
1990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # next sibling element's start tag.  This is either a string or
2000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # the value None.  Note that if there was no text, this attribute
2010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # may be either None or an empty string, depending on the parser.
2020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    tail = None # text after end tag, if any
2040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # constructor
2060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self, tag, attrib={}, **extra):
2080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        attrib = attrib.copy()
2090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        attrib.update(extra)
2100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.tag = tag
2110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.attrib = attrib
2120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._children = []
2130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __repr__(self):
2150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return "<Element %s at 0x%x>" % (repr(self.tag), id(self))
2160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
2180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Creates a new element object of the same type as this element.
2190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
2200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param tag Element tag.
2210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param attrib Element attributes, given as a dictionary.
2220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @return A new element instance.
2230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def makeelement(self, tag, attrib):
2250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.__class__(tag, attrib)
2260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
2280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # (Experimental) Copies the current element.  This creates a
2290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # shallow copy; subelements will be shared with the original tree.
2300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
2310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @return A new element instance.
2320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def copy(self):
2340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elem = self.makeelement(self.tag, self.attrib)
2350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elem.text = self.text
2360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elem.tail = self.tail
2370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elem[:] = self
2380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return elem
2390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
2410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Returns the number of subelements.  Note that this only counts
2420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # full elements; to check if there's any content in an element, you
2430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # have to check both the length and the <b>text</b> attribute.
2440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
2450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @return The number of subelements.
2460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __len__(self):
2480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return len(self._children)
2490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __nonzero__(self):
2510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        warnings.warn(
2520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "The behavior of this method will change in future versions.  "
2530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "Use specific 'len(elem)' or 'elem is not None' test instead.",
2540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            FutureWarning, stacklevel=2
2550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            )
2560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return len(self._children) != 0 # emulate old behaviour, for now
2570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
2590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Returns the given subelement, by index.
2600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
2610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param index What subelement to return.
2620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @return The given subelement.
2630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @exception IndexError If the given element does not exist.
2640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __getitem__(self, index):
2660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self._children[index]
2670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
2690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Replaces the given subelement, by index.
2700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
2710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param index What subelement to replace.
2720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param element The new element value.
2730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @exception IndexError If the given element does not exist.
2740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __setitem__(self, index, element):
2760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # if isinstance(index, slice):
2770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        #     for elt in element:
2780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        #         assert iselement(elt)
2790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # else:
2800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        #     assert iselement(element)
2810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._children[index] = element
2820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
2840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Deletes the given subelement, by index.
2850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
2860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param index What subelement to delete.
2870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @exception IndexError If the given element does not exist.
2880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __delitem__(self, index):
2900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        del self._children[index]
2910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
2930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Adds a subelement to the end of this element.  In document order,
2940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # the new element will appear after the last existing subelement (or
2950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # directly after the text, if it's the first subelement), but before
2960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # the end tag for this element.
2970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
2980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param element The element to add.
2990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def append(self, element):
3010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # assert iselement(element)
3020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._children.append(element)
3030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
3050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Appends subelements from a sequence.
3060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
3070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param elements A sequence object with zero or more elements.
3080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @since 1.3
3090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def extend(self, elements):
3110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # for element in elements:
3120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        #     assert iselement(element)
3130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._children.extend(elements)
3140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
3160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Inserts a subelement at the given position in this element.
3170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
3180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param index Where to insert the new subelement.
3190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def insert(self, index, element):
3210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # assert iselement(element)
3220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._children.insert(index, element)
3230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
3250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Removes a matching subelement.  Unlike the <b>find</b> methods,
3260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # this method compares elements based on identity, not on tag
3270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # value or contents.  To remove subelements by other means, the
3280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # easiest way is often to use a list comprehension to select what
3290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # elements to keep, and use slice assignment to update the parent
3300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # element.
3310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
3320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param element What element to remove.
3330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @exception ValueError If a matching element could not be found.
3340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def remove(self, element):
3360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # assert iselement(element)
3370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._children.remove(element)
3380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
3400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # (Deprecated) Returns all subelements.  The elements are returned
3410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # in document order.
3420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
3430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @return A list of subelements.
3440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @defreturn list of Element instances
3450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def getchildren(self):
3470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        warnings.warn(
3480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "This method will be removed in future versions.  "
3490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "Use 'list(elem)' or iteration over elem instead.",
3500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            DeprecationWarning, stacklevel=2
3510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            )
3520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self._children
3530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
3550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Finds the first matching subelement, by tag name or path.
3560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
3570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param path What element to look for.
3580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @keyparam namespaces Optional namespace prefix map.
3590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @return The first matching element, or None if no element was found.
3600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @defreturn Element or None
3610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def find(self, path, namespaces=None):
3630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return ElementPath.find(self, path, namespaces)
3640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
3660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Finds text for the first matching subelement, by tag name or path.
3670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
3680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param path What element to look for.
3690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param default What to return if the element was not found.
3700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @keyparam namespaces Optional namespace prefix map.
3710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @return The text content of the first matching element, or the
3720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #     default value no element was found.  Note that if the element
3730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #     is found, but has no text content, this method returns an
3740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #     empty string.
3750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @defreturn string
3760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def findtext(self, path, default=None, namespaces=None):
3780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return ElementPath.findtext(self, path, default, namespaces)
3790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
3810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Finds all matching subelements, by tag name or path.
3820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
3830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param path What element to look for.
3840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @keyparam namespaces Optional namespace prefix map.
3850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @return A list or other sequence containing all matching elements,
3860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #    in document order.
3870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @defreturn list of Element instances
3880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def findall(self, path, namespaces=None):
3900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return ElementPath.findall(self, path, namespaces)
3910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
3930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Finds all matching subelements, by tag name or path.
3940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
3950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param path What element to look for.
3960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @keyparam namespaces Optional namespace prefix map.
3970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @return An iterator or sequence containing all matching elements,
3980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #    in document order.
3990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @defreturn a generated sequence of Element instances
4000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def iterfind(self, path, namespaces=None):
4020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return ElementPath.iterfind(self, path, namespaces)
4030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
4050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Resets an element.  This function removes all subelements, clears
4060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # all attributes, and sets the <b>text</b> and <b>tail</b> attributes
4070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # to None.
4080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def clear(self):
4100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.attrib.clear()
4110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._children = []
4120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.text = self.tail = None
4130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
4150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Gets an element attribute.  Equivalent to <b>attrib.get</b>, but
4160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # some implementations may handle this a bit more efficiently.
4170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
4180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param key What attribute to look for.
4190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param default What to return if the attribute was not found.
4200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @return The attribute value, or the default value, if the
4210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #     attribute was not found.
4220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @defreturn string or None
4230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def get(self, key, default=None):
4250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.attrib.get(key, default)
4260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
4280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Sets an element attribute.  Equivalent to <b>attrib[key] = value</b>,
4290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # but some implementations may handle this a bit more efficiently.
4300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
4310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param key What attribute to set.
4320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param value The attribute value.
4330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def set(self, key, value):
4350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.attrib[key] = value
4360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
4380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Gets a list of attribute names.  The names are returned in an
4390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # arbitrary order (just like for an ordinary Python dictionary).
4400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Equivalent to <b>attrib.keys()</b>.
4410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
4420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @return A list of element attribute names.
4430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @defreturn list of strings
4440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def keys(self):
4460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.attrib.keys()
4470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
4490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Gets element attributes, as a sequence.  The attributes are
4500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # returned in an arbitrary order.  Equivalent to <b>attrib.items()</b>.
4510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
4520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @return A list of (name, value) tuples for all attributes.
4530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @defreturn list of (string, string) tuples
4540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def items(self):
4560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.attrib.items()
4570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
4590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Creates a tree iterator.  The iterator loops over this element
4600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # and all subelements, in document order, and returns all elements
4610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # with a matching tag.
4620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # <p>
4630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # If the tree structure is modified during iteration, new or removed
4640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # elements may or may not be included.  To get a stable set, use the
4650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # list() function on the iterator, and loop over the resulting list.
4660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
4670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param tag What tags to look for (default is to return all elements).
4680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @return An iterator containing all the matching elements.
4690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @defreturn iterator
4700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def iter(self, tag=None):
4720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if tag == "*":
4730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            tag = None
4740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if tag is None or self.tag == tag:
4750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            yield self
4760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for e in self._children:
4770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            for e in e.iter(tag):
4780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                yield e
4790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # compatibility
4810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def getiterator(self, tag=None):
4820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Change for a DeprecationWarning in 1.4
4830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        warnings.warn(
4840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "This method will be removed in future versions.  "
4850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "Use 'elem.iter()' or 'list(elem.iter())' instead.",
4860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            PendingDeprecationWarning, stacklevel=2
4870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
4880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return list(self.iter(tag))
4890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
4910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Creates a text iterator.  The iterator loops over this element
4920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # and all subelements, in document order, and returns all inner
4930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # text.
4940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
4950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @return An iterator containing all inner text.
4960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @defreturn iterator
4970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def itertext(self):
4990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        tag = self.tag
5000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not isinstance(tag, basestring) and tag is not None:
5010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return
5020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.text:
5030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            yield self.text
5040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for e in self:
5050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            for s in e.itertext():
5060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                yield s
5070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if e.tail:
5080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                yield e.tail
5090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# compatibility
5110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_Element = _ElementInterface = Element
5120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao##
5140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Subelement factory.  This function creates an element instance, and
5150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# appends it to an existing element.
5160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# <p>
5170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# The element name, attribute names, and attribute values can be
5180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# either 8-bit ASCII strings or Unicode strings.
5190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
5200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param parent The parent element.
5210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param tag The subelement name.
5220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param attrib An optional dictionary, containing element attributes.
5230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param **extra Additional attributes, given as keyword arguments.
5240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @return An element instance.
5250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @defreturn Element
5260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5270a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef SubElement(parent, tag, attrib={}, **extra):
5280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    attrib = attrib.copy()
5290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    attrib.update(extra)
5300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    element = parent.makeelement(tag, attrib)
5310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    parent.append(element)
5320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return element
5330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao##
5350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Comment element factory.  This factory function creates a special
5360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# element that will be serialized as an XML comment by the standard
5370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# serializer.
5380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# <p>
5390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# The comment string can be either an 8-bit ASCII string or a Unicode
5400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# string.
5410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
5420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param text A string containing the comment string.
5430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @return An element instance, representing a comment.
5440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @defreturn Element
5450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5460a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef Comment(text=None):
5470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    element = Element(Comment)
5480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    element.text = text
5490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return element
5500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao##
5520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# PI element factory.  This factory function creates a special element
5530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# that will be serialized as an XML processing instruction by the standard
5540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# serializer.
5550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
5560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param target A string containing the PI target.
5570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param text A string containing the PI contents, if any.
5580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @return An element instance, representing a PI.
5590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @defreturn Element
5600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5610a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef ProcessingInstruction(target, text=None):
5620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    element = Element(ProcessingInstruction)
5630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    element.text = target
5640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if text:
5650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        element.text = element.text + " " + text
5660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return element
5670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5680a8c90248264a8b26970b4473770bcc3df8515fJosh GaoPI = ProcessingInstruction
5690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao##
5710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# QName wrapper.  This can be used to wrap a QName attribute value, in
5720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# order to get proper namespace handling on output.
5730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
5740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param text A string containing the QName value, in the form {uri}local,
5750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#     or, if the tag argument is given, the URI part of a QName.
5760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param tag Optional tag.  If given, the first argument is interpreted as
5770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#     an URI, and this argument is interpreted as a local name.
5780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @return An opaque object, representing the QName.
5790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5800a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass QName(object):
5810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self, text_or_uri, tag=None):
5820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if tag:
5830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            text_or_uri = "{%s}%s" % (text_or_uri, tag)
5840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.text = text_or_uri
5850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __str__(self):
5860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.text
5870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __hash__(self):
5880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return hash(self.text)
5890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __cmp__(self, other):
5900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if isinstance(other, QName):
5910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return cmp(self.text, other.text)
5920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return cmp(self.text, other)
5930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# --------------------------------------------------------------------
5950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao##
5970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# ElementTree wrapper class.  This class represents an entire element
5980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# hierarchy, and adds some extra support for serialization to and from
5990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# standard XML.
6000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
6010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param element Optional root element.
6020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @keyparam file Optional file handle or file name.  If given, the
6030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#     tree is initialized with the contents of this XML file.
6040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6050a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass ElementTree(object):
6060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self, element=None, file=None):
6080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # assert element is None or iselement(element)
6090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._root = element # first node
6100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if file:
6110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.parse(file)
6120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
6140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Gets the root element for this tree.
6150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
6160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @return An element instance.
6170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @defreturn Element
6180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def getroot(self):
6200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self._root
6210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
6230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Replaces the root element for this tree.  This discards the
6240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # current contents of the tree, and replaces it with the given
6250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # element.  Use with care.
6260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
6270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param element An element instance.
6280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _setroot(self, element):
6300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # assert iselement(element)
6310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._root = element
6320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
6340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Loads an external XML document into this element tree.
6350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
6360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param source A file name or file object.  If a file object is
6370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #     given, it only has to implement a <b>read(n)</b> method.
6380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @keyparam parser An optional parser instance.  If not given, the
6390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #     standard {@link XMLParser} parser is used.
6400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @return The document root element.
6410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @defreturn Element
6420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @exception ParseError If the parser fails to parse the document.
6430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def parse(self, source, parser=None):
6450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        close_source = False
6460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not hasattr(source, "read"):
6470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            source = open(source, "rb")
6480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            close_source = True
6490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
6500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if not parser:
6510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                parser = XMLParser(target=TreeBuilder())
6520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            while 1:
6530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                data = source.read(65536)
6540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if not data:
6550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    break
6560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                parser.feed(data)
6570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._root = parser.close()
6580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return self._root
6590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        finally:
6600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if close_source:
6610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                source.close()
6620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
6640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Creates a tree iterator for the root element.  The iterator loops
6650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # over all elements in this tree, in document order.
6660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
6670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param tag What tags to look for (default is to return all elements)
6680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @return An iterator.
6690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @defreturn iterator
6700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def iter(self, tag=None):
6720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # assert self._root is not None
6730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self._root.iter(tag)
6740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # compatibility
6760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def getiterator(self, tag=None):
6770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Change for a DeprecationWarning in 1.4
6780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        warnings.warn(
6790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "This method will be removed in future versions.  "
6800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "Use 'tree.iter()' or 'list(tree.iter())' instead.",
6810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            PendingDeprecationWarning, stacklevel=2
6820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
6830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return list(self.iter(tag))
6840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
6860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Same as getroot().find(path), starting at the root of the
6870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # tree.
6880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
6890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param path What element to look for.
6900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @keyparam namespaces Optional namespace prefix map.
6910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @return The first matching element, or None if no element was found.
6920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @defreturn Element or None
6930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def find(self, path, namespaces=None):
6950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # assert self._root is not None
6960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if path[:1] == "/":
6970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            path = "." + path
6980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            warnings.warn(
6990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                "This search is broken in 1.3 and earlier, and will be "
7000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                "fixed in a future version.  If you rely on the current "
7010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                "behaviour, change it to %r" % path,
7020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                FutureWarning, stacklevel=2
7030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                )
7040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self._root.find(path, namespaces)
7050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
7070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Same as getroot().findtext(path), starting at the root of the tree.
7080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
7090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param path What element to look for.
7100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param default What to return if the element was not found.
7110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @keyparam namespaces Optional namespace prefix map.
7120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @return The text content of the first matching element, or the
7130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #     default value no element was found.  Note that if the element
7140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #     is found, but has no text content, this method returns an
7150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #     empty string.
7160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @defreturn string
7170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def findtext(self, path, default=None, namespaces=None):
7190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # assert self._root is not None
7200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if path[:1] == "/":
7210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            path = "." + path
7220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            warnings.warn(
7230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                "This search is broken in 1.3 and earlier, and will be "
7240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                "fixed in a future version.  If you rely on the current "
7250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                "behaviour, change it to %r" % path,
7260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                FutureWarning, stacklevel=2
7270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                )
7280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self._root.findtext(path, default, namespaces)
7290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
7310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Same as getroot().findall(path), starting at the root of the tree.
7320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
7330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param path What element to look for.
7340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @keyparam namespaces Optional namespace prefix map.
7350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @return A list or iterator containing all matching elements,
7360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #    in document order.
7370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @defreturn list of Element instances
7380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def findall(self, path, namespaces=None):
7400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # assert self._root is not None
7410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if path[:1] == "/":
7420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            path = "." + path
7430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            warnings.warn(
7440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                "This search is broken in 1.3 and earlier, and will be "
7450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                "fixed in a future version.  If you rely on the current "
7460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                "behaviour, change it to %r" % path,
7470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                FutureWarning, stacklevel=2
7480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                )
7490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self._root.findall(path, namespaces)
7500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
7520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Finds all matching subelements, by tag name or path.
7530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Same as getroot().iterfind(path).
7540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
7550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param path What element to look for.
7560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @keyparam namespaces Optional namespace prefix map.
7570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @return An iterator or sequence containing all matching elements,
7580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #    in document order.
7590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @defreturn a generated sequence of Element instances
7600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def iterfind(self, path, namespaces=None):
7620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # assert self._root is not None
7630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if path[:1] == "/":
7640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            path = "." + path
7650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            warnings.warn(
7660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                "This search is broken in 1.3 and earlier, and will be "
7670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                "fixed in a future version.  If you rely on the current "
7680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                "behaviour, change it to %r" % path,
7690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                FutureWarning, stacklevel=2
7700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                )
7710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self._root.iterfind(path, namespaces)
7720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
7740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Writes the element tree to a file, as XML.
7750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
7760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @def write(file, **options)
7770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param file A file name, or a file object opened for writing.
7780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param **options Options, given as keyword arguments.
7790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @keyparam encoding Optional output encoding (default is US-ASCII).
7800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @keyparam xml_declaration Controls if an XML declaration should
7810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #     be added to the file.  Use False for never, True for always,
7820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #     None for only if not US-ASCII or UTF-8.  None is default.
7830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @keyparam default_namespace Sets the default XML namespace (for "xmlns").
7840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @keyparam method Optional output method ("xml", "html", "text" or
7850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #     "c14n"; default is "xml").
7860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def write(self, file_or_filename,
7880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao              # keyword arguments
7890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao              encoding=None,
7900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao              xml_declaration=None,
7910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao              default_namespace=None,
7920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao              method=None):
7930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # assert self._root is not None
7940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not method:
7950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            method = "xml"
7960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif method not in _serialize:
7970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # FIXME: raise an ImportError for c14n if ElementC14N is missing?
7980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise ValueError("unknown method %r" % method)
7990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if hasattr(file_or_filename, "write"):
8000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            file = file_or_filename
8010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
8020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            file = open(file_or_filename, "wb")
8030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        write = file.write
8040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not encoding:
8050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if method == "c14n":
8060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                encoding = "utf-8"
8070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
8080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                encoding = "us-ascii"
8090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif xml_declaration or (xml_declaration is None and
8100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                 encoding not in ("utf-8", "us-ascii")):
8110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if method == "xml":
8120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                write("<?xml version='1.0' encoding='%s'?>\n" % encoding)
8130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if method == "text":
8140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            _serialize_text(write, self._root, encoding)
8150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
8160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            qnames, namespaces = _namespaces(
8170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self._root, encoding, default_namespace
8180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                )
8190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            serialize = _serialize[method]
8200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            serialize(write, self._root, encoding, qnames, namespaces)
8210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if file_or_filename is not file:
8220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            file.close()
8230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def write_c14n(self, file):
8250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # lxml.etree compatibility.  use output method instead
8260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.write(file, method="c14n")
8270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# --------------------------------------------------------------------
8290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# serialization support
8300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8310a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _namespaces(elem, encoding, default_namespace=None):
8320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # identify namespaces used in this tree
8330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # maps qnames to *encoded* prefix:local names
8350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    qnames = {None: None}
8360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # maps uri:s to prefixes
8380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    namespaces = {}
8390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if default_namespace:
8400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        namespaces[default_namespace] = ""
8410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def encode(text):
8430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return text.encode(encoding)
8440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def add_qname(qname):
8460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # calculate serialized qname representation
8470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
8480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if qname[:1] == "{":
8490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                uri, tag = qname[1:].rsplit("}", 1)
8500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                prefix = namespaces.get(uri)
8510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if prefix is None:
8520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    prefix = _namespace_map.get(uri)
8530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    if prefix is None:
8540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        prefix = "ns%d" % len(namespaces)
8550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    if prefix != "xml":
8560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        namespaces[uri] = prefix
8570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if prefix:
8580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    qnames[qname] = encode("%s:%s" % (prefix, tag))
8590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                else:
8600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    qnames[qname] = encode(tag) # default element
8610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
8620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if default_namespace:
8630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    # FIXME: can this be handled in XML 1.0?
8640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    raise ValueError(
8650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        "cannot use non-qualified names with "
8660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        "default_namespace option"
8670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        )
8680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                qnames[qname] = encode(qname)
8690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except TypeError:
8700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            _raise_serialization_error(qname)
8710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # populate qname and namespaces table
8730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    try:
8740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        iterate = elem.iter
8750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    except AttributeError:
8760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        iterate = elem.getiterator # cET compatibility
8770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    for elem in iterate():
8780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        tag = elem.tag
8790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if isinstance(tag, QName):
8800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if tag.text not in qnames:
8810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                add_qname(tag.text)
8820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif isinstance(tag, basestring):
8830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if tag not in qnames:
8840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                add_qname(tag)
8850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif tag is not None and tag is not Comment and tag is not PI:
8860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            _raise_serialization_error(tag)
8870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for key, value in elem.items():
8880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if isinstance(key, QName):
8890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                key = key.text
8900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if key not in qnames:
8910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                add_qname(key)
8920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if isinstance(value, QName) and value.text not in qnames:
8930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                add_qname(value.text)
8940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        text = elem.text
8950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if isinstance(text, QName) and text.text not in qnames:
8960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            add_qname(text.text)
8970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return qnames, namespaces
8980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8990a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _serialize_xml(write, elem, encoding, qnames, namespaces):
9000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    tag = elem.tag
9010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    text = elem.text
9020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if tag is Comment:
9030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        write("<!--%s-->" % _encode(text, encoding))
9040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    elif tag is ProcessingInstruction:
9050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        write("<?%s?>" % _encode(text, encoding))
9060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    else:
9070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        tag = qnames[tag]
9080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if tag is None:
9090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if text:
9100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                write(_escape_cdata(text, encoding))
9110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            for e in elem:
9120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                _serialize_xml(write, e, encoding, qnames, None)
9130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
9140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            write("<" + tag)
9150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            items = elem.items()
9160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if items or namespaces:
9170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if namespaces:
9180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    for v, k in sorted(namespaces.items(),
9190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                       key=lambda x: x[1]):  # sort on prefix
9200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        if k:
9210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                            k = ":" + k
9220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        write(" xmlns%s=\"%s\"" % (
9230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                            k.encode(encoding),
9240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                            _escape_attrib(v, encoding)
9250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                            ))
9260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                for k, v in sorted(items):  # lexical order
9270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    if isinstance(k, QName):
9280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        k = k.text
9290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    if isinstance(v, QName):
9300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        v = qnames[v.text]
9310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    else:
9320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        v = _escape_attrib(v, encoding)
9330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    write(" %s=\"%s\"" % (qnames[k], v))
9340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if text or len(elem):
9350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                write(">")
9360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if text:
9370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    write(_escape_cdata(text, encoding))
9380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                for e in elem:
9390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    _serialize_xml(write, e, encoding, qnames, None)
9400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                write("</" + tag + ">")
9410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
9420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                write(" />")
9430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if elem.tail:
9440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        write(_escape_cdata(elem.tail, encoding))
9450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9460a8c90248264a8b26970b4473770bcc3df8515fJosh GaoHTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
9470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao              "img", "input", "isindex", "link", "meta", "param")
9480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9490a8c90248264a8b26970b4473770bcc3df8515fJosh Gaotry:
9500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    HTML_EMPTY = set(HTML_EMPTY)
9510a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoexcept NameError:
9520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    pass
9530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9540a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _serialize_html(write, elem, encoding, qnames, namespaces):
9550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    tag = elem.tag
9560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    text = elem.text
9570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if tag is Comment:
9580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        write("<!--%s-->" % _escape_cdata(text, encoding))
9590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    elif tag is ProcessingInstruction:
9600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        write("<?%s?>" % _escape_cdata(text, encoding))
9610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    else:
9620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        tag = qnames[tag]
9630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if tag is None:
9640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if text:
9650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                write(_escape_cdata(text, encoding))
9660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            for e in elem:
9670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                _serialize_html(write, e, encoding, qnames, None)
9680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
9690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            write("<" + tag)
9700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            items = elem.items()
9710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if items or namespaces:
9720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if namespaces:
9730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    for v, k in sorted(namespaces.items(),
9740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                       key=lambda x: x[1]):  # sort on prefix
9750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        if k:
9760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                            k = ":" + k
9770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        write(" xmlns%s=\"%s\"" % (
9780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                            k.encode(encoding),
9790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                            _escape_attrib(v, encoding)
9800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                            ))
9810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                for k, v in sorted(items):  # lexical order
9820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    if isinstance(k, QName):
9830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        k = k.text
9840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    if isinstance(v, QName):
9850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        v = qnames[v.text]
9860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    else:
9870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        v = _escape_attrib_html(v, encoding)
9880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    # FIXME: handle boolean attributes
9890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    write(" %s=\"%s\"" % (qnames[k], v))
9900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            write(">")
9910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            tag = tag.lower()
9920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if text:
9930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if tag == "script" or tag == "style":
9940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    write(_encode(text, encoding))
9950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                else:
9960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    write(_escape_cdata(text, encoding))
9970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            for e in elem:
9980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                _serialize_html(write, e, encoding, qnames, None)
9990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if tag not in HTML_EMPTY:
10000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                write("</" + tag + ">")
10010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if elem.tail:
10020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        write(_escape_cdata(elem.tail, encoding))
10030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10040a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _serialize_text(write, elem, encoding):
10050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    for part in elem.itertext():
10060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        write(part.encode(encoding))
10070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if elem.tail:
10080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        write(elem.tail.encode(encoding))
10090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_serialize = {
10110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "xml": _serialize_xml,
10120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "html": _serialize_html,
10130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "text": _serialize_text,
10140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# this optional method is imported at the end of the module
10150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#   "c14n": _serialize_c14n,
10160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao}
10170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao##
10190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Registers a namespace prefix.  The registry is global, and any
10200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# existing mapping for either the given prefix or the namespace URI
10210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# will be removed.
10220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
10230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param prefix Namespace prefix.
10240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param uri Namespace uri.  Tags and attributes in this namespace
10250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#     will be serialized with the given prefix, if at all possible.
10260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @exception ValueError If the prefix is reserved, or is otherwise
10270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#     invalid.
10280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10290a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef register_namespace(prefix, uri):
10300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if re.match("ns\d+$", prefix):
10310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        raise ValueError("Prefix format reserved for internal use")
10320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    for k, v in _namespace_map.items():
10330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if k == uri or v == prefix:
10340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            del _namespace_map[k]
10350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    _namespace_map[uri] = prefix
10360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_namespace_map = {
10380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # "well-known" namespace prefixes
10390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "http://www.w3.org/XML/1998/namespace": "xml",
10400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "http://www.w3.org/1999/xhtml": "html",
10410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
10420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "http://schemas.xmlsoap.org/wsdl/": "wsdl",
10430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # xml schema
10440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "http://www.w3.org/2001/XMLSchema": "xs",
10450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "http://www.w3.org/2001/XMLSchema-instance": "xsi",
10460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # dublin core
10470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "http://purl.org/dc/elements/1.1/": "dc",
10480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao}
10490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10500a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _raise_serialization_error(text):
10510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    raise TypeError(
10520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        "cannot serialize %r (type %s)" % (text, type(text).__name__)
10530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
10540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10550a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _encode(text, encoding):
10560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    try:
10570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return text.encode(encoding, "xmlcharrefreplace")
10580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    except (TypeError, AttributeError):
10590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        _raise_serialization_error(text)
10600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10610a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _escape_cdata(text, encoding):
10620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # escape character data
10630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    try:
10640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # it's worth avoiding do-nothing calls for strings that are
10650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # shorter than 500 character, or so.  assume that's, by far,
10660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # the most common case in most applications.
10670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if "&" in text:
10680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            text = text.replace("&", "&amp;")
10690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if "<" in text:
10700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            text = text.replace("<", "&lt;")
10710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if ">" in text:
10720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            text = text.replace(">", "&gt;")
10730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return text.encode(encoding, "xmlcharrefreplace")
10740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    except (TypeError, AttributeError):
10750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        _raise_serialization_error(text)
10760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10770a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _escape_attrib(text, encoding):
10780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # escape attribute value
10790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    try:
10800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if "&" in text:
10810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            text = text.replace("&", "&amp;")
10820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if "<" in text:
10830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            text = text.replace("<", "&lt;")
10840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if ">" in text:
10850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            text = text.replace(">", "&gt;")
10860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if "\"" in text:
10870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            text = text.replace("\"", "&quot;")
10880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if "\n" in text:
10890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            text = text.replace("\n", "&#10;")
10900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return text.encode(encoding, "xmlcharrefreplace")
10910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    except (TypeError, AttributeError):
10920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        _raise_serialization_error(text)
10930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10940a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _escape_attrib_html(text, encoding):
10950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # escape attribute value
10960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    try:
10970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if "&" in text:
10980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            text = text.replace("&", "&amp;")
10990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if ">" in text:
11000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            text = text.replace(">", "&gt;")
11010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if "\"" in text:
11020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            text = text.replace("\"", "&quot;")
11030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return text.encode(encoding, "xmlcharrefreplace")
11040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    except (TypeError, AttributeError):
11050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        _raise_serialization_error(text)
11060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# --------------------------------------------------------------------
11080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao##
11100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Generates a string representation of an XML element, including all
11110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# subelements.
11120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
11130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param element An Element instance.
11140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @keyparam encoding Optional output encoding (default is US-ASCII).
11150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @keyparam method Optional output method ("xml", "html", "text" or
11160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#     "c14n"; default is "xml").
11170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @return An encoded string containing the XML data.
11180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @defreturn string
11190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11200a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef tostring(element, encoding=None, method=None):
11210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    class dummy:
11220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        pass
11230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    data = []
11240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    file = dummy()
11250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    file.write = data.append
11260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ElementTree(element).write(file, encoding, method=method)
11270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return "".join(data)
11280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao##
11300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Generates a string representation of an XML element, including all
11310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# subelements.  The string is returned as a sequence of string fragments.
11320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
11330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param element An Element instance.
11340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @keyparam encoding Optional output encoding (default is US-ASCII).
11350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @keyparam method Optional output method ("xml", "html", "text" or
11360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#     "c14n"; default is "xml").
11370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @return A sequence object containing the XML data.
11380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @defreturn sequence
11390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @since 1.3
11400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11410a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef tostringlist(element, encoding=None, method=None):
11420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    class dummy:
11430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        pass
11440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    data = []
11450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    file = dummy()
11460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    file.write = data.append
11470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ElementTree(element).write(file, encoding, method=method)
11480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # FIXME: merge small fragments into larger parts
11490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return data
11500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao##
11520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Writes an element tree or element structure to sys.stdout.  This
11530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# function should be used for debugging only.
11540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# <p>
11550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# The exact output format is implementation dependent.  In this
11560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# version, it's written as an ordinary XML file.
11570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
11580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param elem An element tree or an individual element.
11590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11600a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef dump(elem):
11610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # debugging
11620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if not isinstance(elem, ElementTree):
11630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elem = ElementTree(elem)
11640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    elem.write(sys.stdout)
11650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    tail = elem.getroot().tail
11660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if not tail or tail[-1] != "\n":
11670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        sys.stdout.write("\n")
11680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# --------------------------------------------------------------------
11700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# parsing
11710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao##
11730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Parses an XML document into an element tree.
11740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
11750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param source A filename or file object containing XML data.
11760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param parser An optional parser instance.  If not given, the
11770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#     standard {@link XMLParser} parser is used.
11780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @return An ElementTree instance
11790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11800a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef parse(source, parser=None):
11810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    tree = ElementTree()
11820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    tree.parse(source, parser)
11830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return tree
11840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao##
11860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Parses an XML document into an element tree incrementally, and reports
11870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# what's going on to the user.
11880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
11890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param source A filename or file object containing XML data.
11900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param events A list of events to report back.  If omitted, only "end"
11910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#     events are reported.
11920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param parser An optional parser instance.  If not given, the
11930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#     standard {@link XMLParser} parser is used.
11940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @return A (event, elem) iterator.
11950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11960a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef iterparse(source, events=None, parser=None):
11970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    close_source = False
11980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if not hasattr(source, "read"):
11990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        source = open(source, "rb")
12000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        close_source = True
12010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if not parser:
12020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser = XMLParser(target=TreeBuilder())
12030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return _IterParseIterator(source, events, parser, close_source)
12040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
12050a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass _IterParseIterator(object):
12060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
12070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self, source, events, parser, close_source=False):
12080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._file = source
12090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._close_file = close_source
12100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._events = []
12110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._index = 0
12120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._error = None
12130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.root = self._root = None
12140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._parser = parser
12150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # wire up the parser for event reporting
12160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser = self._parser._parser
12170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        append = self._events.append
12180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if events is None:
12190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            events = ["end"]
12200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for event in events:
12210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if event == "start":
12220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                try:
12230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    parser.ordered_attributes = 1
12240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    parser.specified_attributes = 1
12250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    def handler(tag, attrib_in, event=event, append=append,
12260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                start=self._parser._start_list):
12270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        append((event, start(tag, attrib_in)))
12280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    parser.StartElementHandler = handler
12290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                except AttributeError:
12300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    def handler(tag, attrib_in, event=event, append=append,
12310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                start=self._parser._start):
12320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        append((event, start(tag, attrib_in)))
12330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    parser.StartElementHandler = handler
12340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            elif event == "end":
12350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                def handler(tag, event=event, append=append,
12360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                            end=self._parser._end):
12370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    append((event, end(tag)))
12380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                parser.EndElementHandler = handler
12390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            elif event == "start-ns":
12400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                def handler(prefix, uri, event=event, append=append):
12410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    try:
12420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        uri = (uri or "").encode("ascii")
12430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    except UnicodeError:
12440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        pass
12450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    append((event, (prefix or "", uri or "")))
12460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                parser.StartNamespaceDeclHandler = handler
12470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            elif event == "end-ns":
12480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                def handler(prefix, event=event, append=append):
12490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    append((event, None))
12500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                parser.EndNamespaceDeclHandler = handler
12510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
12520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise ValueError("unknown event %r" % event)
12530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
12540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def next(self):
12550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        while 1:
12560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            try:
12570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                item = self._events[self._index]
12580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self._index += 1
12590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                return item
12600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            except IndexError:
12610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                pass
12620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if self._error:
12630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                e = self._error
12640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self._error = None
12650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise e
12660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if self._parser is None:
12670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.root = self._root
12680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if self._close_file:
12690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    self._file.close()
12700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise StopIteration
12710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # load event buffer
12720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            del self._events[:]
12730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._index = 0
12740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            data = self._file.read(16384)
12750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if data:
12760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                try:
12770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    self._parser.feed(data)
12780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                except SyntaxError as exc:
12790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    self._error = exc
12800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
12810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self._root = self._parser.close()
12820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self._parser = None
12830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
12840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __iter__(self):
12850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self
12860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
12870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao##
12880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Parses an XML document from a string constant.  This function can
12890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# be used to embed "XML literals" in Python code.
12900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
12910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param source A string containing XML data.
12920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param parser An optional parser instance.  If not given, the
12930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#     standard {@link XMLParser} parser is used.
12940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @return An Element instance.
12950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @defreturn Element
12960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
12970a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef XML(text, parser=None):
12980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if not parser:
12990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser = XMLParser(target=TreeBuilder())
13000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    parser.feed(text)
13010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return parser.close()
13020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao##
13040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Parses an XML document from a string constant, and also returns
13050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# a dictionary which maps from element id:s to elements.
13060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
13070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param source A string containing XML data.
13080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param parser An optional parser instance.  If not given, the
13090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#     standard {@link XMLParser} parser is used.
13100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @return A tuple containing an Element instance and a dictionary.
13110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @defreturn (Element, dictionary)
13120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13130a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef XMLID(text, parser=None):
13140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if not parser:
13150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser = XMLParser(target=TreeBuilder())
13160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    parser.feed(text)
13170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    tree = parser.close()
13180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ids = {}
13190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    for elem in tree.iter():
13200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        id = elem.get("id")
13210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if id:
13220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            ids[id] = elem
13230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return tree, ids
13240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao##
13260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Parses an XML document from a string constant.  Same as {@link #XML}.
13270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
13280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @def fromstring(text)
13290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param source A string containing XML data.
13300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @return An Element instance.
13310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @defreturn Element
13320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13330a8c90248264a8b26970b4473770bcc3df8515fJosh Gaofromstring = XML
13340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao##
13360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Parses an XML document from a sequence of string fragments.
13370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
13380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param sequence A list or other sequence containing XML data fragments.
13390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param parser An optional parser instance.  If not given, the
13400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#     standard {@link XMLParser} parser is used.
13410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @return An Element instance.
13420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @defreturn Element
13430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @since 1.3
13440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13450a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef fromstringlist(sequence, parser=None):
13460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if not parser:
13470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser = XMLParser(target=TreeBuilder())
13480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    for text in sequence:
13490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser.feed(text)
13500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return parser.close()
13510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# --------------------------------------------------------------------
13530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao##
13550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Generic element structure builder.  This builder converts a sequence
13560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link
13570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# #TreeBuilder.end} method calls to a well-formed element structure.
13580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# <p>
13590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# You can use this class to build an element structure using a custom XML
13600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# parser, or a parser for some other XML-like format.
13610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
13620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param element_factory Optional element factory.  This factory
13630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#    is called to create new Element instances, as necessary.
13640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13650a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass TreeBuilder(object):
13660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self, element_factory=None):
13680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._data = [] # data collector
13690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._elem = [] # element stack
13700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._last = None # last element
13710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._tail = None # true if we're after an end tag
13720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if element_factory is None:
13730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            element_factory = Element
13740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._factory = element_factory
13750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
13770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Flushes the builder buffers, and returns the toplevel document
13780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # element.
13790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
13800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @return An Element instance.
13810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @defreturn Element
13820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def close(self):
13840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        assert len(self._elem) == 0, "missing end tags"
13850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        assert self._last is not None, "missing toplevel element"
13860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self._last
13870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _flush(self):
13890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self._data:
13900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if self._last is not None:
13910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                text = "".join(self._data)
13920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if self._tail:
13930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    assert self._last.tail is None, "internal error (tail)"
13940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    self._last.tail = text
13950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                else:
13960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    assert self._last.text is None, "internal error (text)"
13970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    self._last.text = text
13980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._data = []
13990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
14010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Adds text to the current element.
14020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
14030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param data A string.  This should be either an 8-bit string
14040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #    containing ASCII text, or a Unicode string.
14050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def data(self, data):
14070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._data.append(data)
14080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
14100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Opens a new element.
14110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
14120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param tag The element name.
14130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param attrib A dictionary containing element attributes.
14140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @return The opened element.
14150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @defreturn Element
14160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def start(self, tag, attrs):
14180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._flush()
14190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._last = elem = self._factory(tag, attrs)
14200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self._elem:
14210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._elem[-1].append(elem)
14220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._elem.append(elem)
14230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._tail = 0
14240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return elem
14250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
14270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Closes the current element.
14280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
14290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param tag The element name.
14300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @return The closed element.
14310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @defreturn Element
14320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def end(self, tag):
14340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._flush()
14350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._last = self._elem.pop()
14360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        assert self._last.tag == tag,\
14370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao               "end tag mismatch (expected %s, got %s)" % (
14380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                   self._last.tag, tag)
14390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._tail = 1
14400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self._last
14410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao##
14430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Element structure builder for XML source data, based on the
14440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# <b>expat</b> parser.
14450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
14460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @keyparam target Target object.  If omitted, the builder uses an
14470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#     instance of the standard {@link #TreeBuilder} class.
14480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @keyparam html Predefine HTML entities.  This flag is not supported
14490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#     by the current implementation.
14500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @keyparam encoding Optional encoding.  If given, the value overrides
14510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#     the encoding specified in the XML file.
14520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @see #ElementTree
14530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @see #TreeBuilder
14540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14550a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass XMLParser(object):
14560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self, html=0, target=None, encoding=None):
14580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
14590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            from xml.parsers import expat
14600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except ImportError:
14610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            try:
14620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                import pyexpat as expat
14630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            except ImportError:
14640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise ImportError(
14650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    "No module named expat; use SimpleXMLTreeBuilder instead"
14660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    )
14670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser = expat.ParserCreate(encoding, "}")
14680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if target is None:
14690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            target = TreeBuilder()
14700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # underscored names are provided for compatibility only
14710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.parser = self._parser = parser
14720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.target = self._target = target
14730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._error = expat.error
14740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._names = {} # name memo cache
14750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # callbacks
14760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser.DefaultHandlerExpand = self._default
14770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser.StartElementHandler = self._start
14780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser.EndElementHandler = self._end
14790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser.CharacterDataHandler = self._data
14800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # optional callbacks
14810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser.CommentHandler = self._comment
14820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parser.ProcessingInstructionHandler = self._pi
14830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # let expat do the buffering, if supported
14840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
14850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._parser.buffer_text = 1
14860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except AttributeError:
14870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            pass
14880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # use new-style attribute handling, if supported
14890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
14900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._parser.ordered_attributes = 1
14910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._parser.specified_attributes = 1
14920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            parser.StartElementHandler = self._start_list
14930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except AttributeError:
14940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            pass
14950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._doctype = None
14960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.entity = {}
14970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
14980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.version = "Expat %d.%d.%d" % expat.version_info
14990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except AttributeError:
15000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            pass # unknown
15010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
15020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _raiseerror(self, value):
15030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        err = ParseError(value)
15040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        err.code = value.code
15050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        err.position = value.lineno, value.offset
15060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        raise err
15070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
15080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _fixtext(self, text):
15090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # convert text string to ascii, if possible
15100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
15110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return text.encode("ascii")
15120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except UnicodeError:
15130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return text
15140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
15150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _fixname(self, key):
15160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # expand qname, and convert name string to ascii, if possible
15170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
15180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            name = self._names[key]
15190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except KeyError:
15200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            name = key
15210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if "}" in name:
15220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                name = "{" + name
15230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._names[key] = name = self._fixtext(name)
15240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return name
15250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
15260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _start(self, tag, attrib_in):
15270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        fixname = self._fixname
15280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        fixtext = self._fixtext
15290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        tag = fixname(tag)
15300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        attrib = {}
15310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for key, value in attrib_in.items():
15320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            attrib[fixname(key)] = fixtext(value)
15330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.target.start(tag, attrib)
15340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
15350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _start_list(self, tag, attrib_in):
15360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        fixname = self._fixname
15370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        fixtext = self._fixtext
15380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        tag = fixname(tag)
15390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        attrib = {}
15400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if attrib_in:
15410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            for i in range(0, len(attrib_in), 2):
15420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                attrib[fixname(attrib_in[i])] = fixtext(attrib_in[i+1])
15430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.target.start(tag, attrib)
15440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
15450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _data(self, text):
15460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.target.data(self._fixtext(text))
15470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
15480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _end(self, tag):
15490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.target.end(self._fixname(tag))
15500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
15510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _comment(self, data):
15520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
15530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            comment = self.target.comment
15540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except AttributeError:
15550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            pass
15560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
15570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return comment(self._fixtext(data))
15580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
15590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _pi(self, target, data):
15600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
15610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            pi = self.target.pi
15620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except AttributeError:
15630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            pass
15640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
15650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return pi(self._fixtext(target), self._fixtext(data))
15660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
15670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _default(self, text):
15680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        prefix = text[:1]
15690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if prefix == "&":
15700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # deal with undefined entities
15710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            try:
15720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.target.data(self.entity[text[1:-1]])
15730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            except KeyError:
15740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                from xml.parsers import expat
15750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                err = expat.error(
15760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    "undefined entity %s: line %d, column %d" %
15770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    (text, self._parser.ErrorLineNumber,
15780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    self._parser.ErrorColumnNumber)
15790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    )
15800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                err.code = 11 # XML_ERROR_UNDEFINED_ENTITY
15810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                err.lineno = self._parser.ErrorLineNumber
15820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                err.offset = self._parser.ErrorColumnNumber
15830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise err
15840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif prefix == "<" and text[:9] == "<!DOCTYPE":
15850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._doctype = [] # inside a doctype declaration
15860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif self._doctype is not None:
15870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # parse doctype contents
15880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if prefix == ">":
15890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self._doctype = None
15900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                return
15910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            text = text.strip()
15920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if not text:
15930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                return
15940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._doctype.append(text)
15950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            n = len(self._doctype)
15960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if n > 2:
15970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                type = self._doctype[1]
15980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if type == "PUBLIC" and n == 4:
15990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    name, type, pubid, system = self._doctype
16000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                elif type == "SYSTEM" and n == 3:
16010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    name, type, system = self._doctype
16020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    pubid = None
16030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                else:
16040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    return
16050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if pubid:
16060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    pubid = pubid[1:-1]
16070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if hasattr(self.target, "doctype"):
16080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    self.target.doctype(name, pubid, system[1:-1])
16090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                elif self.doctype is not self._XMLParser__doctype:
16100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    # warn about deprecated call
16110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    self._XMLParser__doctype(name, pubid, system[1:-1])
16120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    self.doctype(name, pubid, system[1:-1])
16130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self._doctype = None
16140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
16150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
16160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # (Deprecated) Handles a doctype declaration.
16170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
16180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param name Doctype name.
16190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param pubid Public identifier.
16200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param system System identifier.
16210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
16220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def doctype(self, name, pubid, system):
16230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """This method of XMLParser is deprecated."""
16240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        warnings.warn(
16250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "This method of XMLParser is deprecated.  Define doctype() "
16260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "method on the TreeBuilder target.",
16270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            DeprecationWarning,
16280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            )
16290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
16300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # sentinel, if doctype is redefined in a subclass
16310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    __doctype = doctype
16320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
16330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
16340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Feeds data to the parser.
16350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
16360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @param data Encoded data.
16370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
16380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def feed(self, data):
16390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
16400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._parser.Parse(data, 0)
16410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except self._error, v:
16420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._raiseerror(v)
16430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
16440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ##
16450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Finishes feeding data to the parser.
16460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
16470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @return An element structure.
16480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # @defreturn Element
16490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
16500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def close(self):
16510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
16520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._parser.Parse("", 1) # end of data
16530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except self._error, v:
16540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._raiseerror(v)
16550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        tree = self.target.close()
16560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        del self.target, self._parser # get rid of circular references
16570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return tree
16580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
16590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# compatibility
16600a8c90248264a8b26970b4473770bcc3df8515fJosh GaoXMLTreeBuilder = XMLParser
16610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
16620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# workaround circular import.
16630a8c90248264a8b26970b4473770bcc3df8515fJosh Gaotry:
16640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    from ElementC14N import _serialize_c14n
16650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    _serialize["c14n"] = _serialize_c14n
16660a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoexcept ImportError:
16670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    pass
1668