10a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 20a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# ElementTree 30a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# $Id: ElementTree.py 3440 2008-07-18 14:45:01Z fredrik $ 40a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 50a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# light-weight XML support for Python 2.3 and later. 60a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 70a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# history (since 1.2.6): 80a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 2005-11-12 fl added tostringlist/fromstringlist helpers 90a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 2006-07-05 fl merged in selected changes from the 1.3 sandbox 100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 2006-07-05 fl removed support for 2.1 and earlier 110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 2007-06-21 fl added deprecation/future warnings 120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 2007-08-25 fl added doctype hook, added parser version attribute etc 130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 2007-08-26 fl added new serializer code (better namespace handling, etc) 140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 2007-08-27 fl warn for broken /tag searches on tree level 150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 2007-09-02 fl added html/text methods to serializer (experimental) 160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 2007-09-05 fl added method argument to tostring/tostringlist 170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 2007-09-06 fl improved error handling 180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 2007-09-13 fl added itertext, iterfind; assorted cleanups 190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 2007-12-15 fl added C14N hooks, copy method (experimental) 200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved. 220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# fredrik@pythonware.com 240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# http://www.pythonware.com 250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# -------------------------------------------------------------------- 270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# The ElementTree toolkit is 280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Copyright (c) 1999-2008 by Fredrik Lundh 300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# By obtaining, using, and/or copying this software and/or its 320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# associated documentation, you agree that you have read, understood, 330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# and will comply with the following terms and conditions: 340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Permission to use, copy, modify, and distribute this software and 360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# its associated documentation for any purpose and without fee is 370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# hereby granted, provided that the above copyright notice appears in 380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# all copies, and that both that copyright notice and this permission 390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# notice appear in supporting documentation, and that the name of 400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Secret Labs AB or the author not be used in advertising or publicity 410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# pertaining to distribution of the software without specific, written 420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# prior permission. 430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD 450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- 460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR 470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# OF THIS SOFTWARE. 520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# -------------------------------------------------------------------- 530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Licensed to PSF under a Contributor Agreement. 550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# See http://www.python.org/psf/license for licensing details. 560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao__all__ = [ 580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # public symbols 590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "Comment", 600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "dump", 610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "Element", "ElementTree", 620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "fromstring", "fromstringlist", 630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "iselement", "iterparse", 640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "parse", "ParseError", 650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "PI", "ProcessingInstruction", 660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "QName", 670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "SubElement", 680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "tostring", "tostringlist", 690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "TreeBuilder", 700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "VERSION", 710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "XML", 720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "XMLParser", "XMLTreeBuilder", 730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ] 740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 750a8c90248264a8b26970b4473770bcc3df8515fJosh GaoVERSION = "1.3.0" 760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao## 780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# The <b>Element</b> type is a flexible container object, designed to 790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# store hierarchical data structures in memory. The type can be 800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# described as a cross between a list and a dictionary. 810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# <p> 820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Each element has a number of properties associated with it: 830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# <ul> 840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# <li>a <i>tag</i>. This is a string identifying what kind of data 850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# this element represents (the element type, in other words).</li> 860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# <li>a number of <i>attributes</i>, stored in a Python dictionary.</li> 870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# <li>a <i>text</i> string.</li> 880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# <li>an optional <i>tail</i> string.</li> 890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# <li>a number of <i>child elements</i>, stored in a Python sequence</li> 900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# </ul> 910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# To create an element instance, use the {@link #Element} constructor 930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# or the {@link #SubElement} factory function. 940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# <p> 950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# The {@link #ElementTree} class can be used to wrap an element 960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# structure, and convert it from and to XML. 970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao## 980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 990a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport sys 1000a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport re 1010a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport warnings 1020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1040a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass _SimpleElementPath(object): 1050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # emulate pre-1.2 find/findtext/findall behaviour 1060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def find(self, element, tag, namespaces=None): 1070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for elem in element: 1080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if elem.tag == tag: 1090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return elem 1100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return None 1110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def findtext(self, element, tag, default=None, namespaces=None): 1120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elem = self.find(element, tag) 1130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if elem is None: 1140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return default 1150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return elem.text or "" 1160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def iterfind(self, element, tag, namespaces=None): 1170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tag[:3] == ".//": 1180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for elem in element.iter(tag[3:]): 1190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao yield elem 1200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for elem in element: 1210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if elem.tag == tag: 1220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao yield elem 1230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def findall(self, element, tag, namespaces=None): 1240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return list(self.iterfind(element, tag, namespaces)) 1250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1260a8c90248264a8b26970b4473770bcc3df8515fJosh Gaotry: 1270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao from . import ElementPath 1280a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoexcept ImportError: 1290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ElementPath = _SimpleElementPath() 1300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao## 1320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Parser error. This is a subclass of <b>SyntaxError</b>. 1330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# <p> 1340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# In addition to the exception value, an exception instance contains a 1350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# specific exception code in the <b>code</b> attribute, and the line and 1360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# column of the error in the <b>position</b> attribute. 1370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1380a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass ParseError(SyntaxError): 1390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 1400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# -------------------------------------------------------------------- 1420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao## 1440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Checks if an object appears to be a valid element object. 1450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 1460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param An element instance. 1470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @return A true value if this is an element object. 1480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @defreturn flag 1490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1500a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef iselement(element): 1510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # FIXME: not sure about this; might be a better idea to look 1520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # for tag/attrib/text attributes 1530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return isinstance(element, Element) or hasattr(element, "tag") 1540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao## 1560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Element class. This class defines the Element interface, and 1570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# provides a reference implementation of this interface. 1580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# <p> 1590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# The element name, attribute names, and attribute values can be 1600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# either ASCII strings (ordinary Python strings containing only 7-bit 1610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# ASCII characters) or Unicode strings. 1620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 1630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param tag The element name. 1640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param attrib An optional dictionary, containing element attributes. 1650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param **extra Additional attributes, given as keyword arguments. 1660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @see Element 1670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @see SubElement 1680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @see Comment 1690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @see ProcessingInstruction 1700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1710a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass Element(object): 1720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # <tag attrib>text<child/>...</tag>tail 1730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 1750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # (Attribute) Element tag. 1760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tag = None 1780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 1800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # (Attribute) Element attribute dictionary. Where possible, use 1810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # {@link #Element.get}, 1820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # {@link #Element.set}, 1830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # {@link #Element.keys}, and 1840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # {@link #Element.items} to access 1850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # element attributes. 1860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao attrib = None 1880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 1900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # (Attribute) Text before first subelement. This is either a 1910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # string or the value None. Note that if there was no text, this 1920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # attribute may be either None or an empty string, depending on 1930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # the parser. 1940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao text = None 1960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 1980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # (Attribute) Text after this element's end tag, but before the 1990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # next sibling element's start tag. This is either a string or 2000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # the value None. Note that if there was no text, this attribute 2010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # may be either None or an empty string, depending on the parser. 2020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tail = None # text after end tag, if any 2040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # constructor 2060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, tag, attrib={}, **extra): 2080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao attrib = attrib.copy() 2090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao attrib.update(extra) 2100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.tag = tag 2110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.attrib = attrib 2120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._children = [] 2130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __repr__(self): 2150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return "<Element %s at 0x%x>" % (repr(self.tag), id(self)) 2160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 2180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Creates a new element object of the same type as this element. 2190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 2200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param tag Element tag. 2210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param attrib Element attributes, given as a dictionary. 2220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @return A new element instance. 2230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def makeelement(self, tag, attrib): 2250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.__class__(tag, attrib) 2260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 2280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # (Experimental) Copies the current element. This creates a 2290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # shallow copy; subelements will be shared with the original tree. 2300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 2310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @return A new element instance. 2320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def copy(self): 2340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elem = self.makeelement(self.tag, self.attrib) 2350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elem.text = self.text 2360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elem.tail = self.tail 2370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elem[:] = self 2380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return elem 2390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 2410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Returns the number of subelements. Note that this only counts 2420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # full elements; to check if there's any content in an element, you 2430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # have to check both the length and the <b>text</b> attribute. 2440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 2450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @return The number of subelements. 2460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __len__(self): 2480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return len(self._children) 2490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __nonzero__(self): 2510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao warnings.warn( 2520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "The behavior of this method will change in future versions. " 2530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "Use specific 'len(elem)' or 'elem is not None' test instead.", 2540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao FutureWarning, stacklevel=2 2550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 2560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return len(self._children) != 0 # emulate old behaviour, for now 2570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 2590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Returns the given subelement, by index. 2600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 2610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param index What subelement to return. 2620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @return The given subelement. 2630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @exception IndexError If the given element does not exist. 2640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __getitem__(self, index): 2660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self._children[index] 2670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 2690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Replaces the given subelement, by index. 2700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 2710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param index What subelement to replace. 2720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param element The new element value. 2730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @exception IndexError If the given element does not exist. 2740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __setitem__(self, index, element): 2760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # if isinstance(index, slice): 2770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # for elt in element: 2780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # assert iselement(elt) 2790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # else: 2800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # assert iselement(element) 2810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._children[index] = element 2820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 2840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Deletes the given subelement, by index. 2850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 2860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param index What subelement to delete. 2870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @exception IndexError If the given element does not exist. 2880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __delitem__(self, index): 2900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao del self._children[index] 2910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 2930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Adds a subelement to the end of this element. In document order, 2940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # the new element will appear after the last existing subelement (or 2950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # directly after the text, if it's the first subelement), but before 2960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # the end tag for this element. 2970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 2980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param element The element to add. 2990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def append(self, element): 3010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # assert iselement(element) 3020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._children.append(element) 3030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 3050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Appends subelements from a sequence. 3060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 3070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param elements A sequence object with zero or more elements. 3080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @since 1.3 3090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def extend(self, elements): 3110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # for element in elements: 3120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # assert iselement(element) 3130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._children.extend(elements) 3140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 3160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Inserts a subelement at the given position in this element. 3170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 3180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param index Where to insert the new subelement. 3190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def insert(self, index, element): 3210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # assert iselement(element) 3220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._children.insert(index, element) 3230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 3250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Removes a matching subelement. Unlike the <b>find</b> methods, 3260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # this method compares elements based on identity, not on tag 3270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # value or contents. To remove subelements by other means, the 3280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # easiest way is often to use a list comprehension to select what 3290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # elements to keep, and use slice assignment to update the parent 3300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # element. 3310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 3320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param element What element to remove. 3330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @exception ValueError If a matching element could not be found. 3340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def remove(self, element): 3360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # assert iselement(element) 3370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._children.remove(element) 3380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 3400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # (Deprecated) Returns all subelements. The elements are returned 3410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # in document order. 3420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 3430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @return A list of subelements. 3440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @defreturn list of Element instances 3450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def getchildren(self): 3470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao warnings.warn( 3480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "This method will be removed in future versions. " 3490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "Use 'list(elem)' or iteration over elem instead.", 3500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao DeprecationWarning, stacklevel=2 3510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 3520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self._children 3530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 3550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Finds the first matching subelement, by tag name or path. 3560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 3570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param path What element to look for. 3580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @keyparam namespaces Optional namespace prefix map. 3590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @return The first matching element, or None if no element was found. 3600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @defreturn Element or None 3610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def find(self, path, namespaces=None): 3630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return ElementPath.find(self, path, namespaces) 3640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 3660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Finds text for the first matching subelement, by tag name or path. 3670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 3680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param path What element to look for. 3690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param default What to return if the element was not found. 3700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @keyparam namespaces Optional namespace prefix map. 3710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @return The text content of the first matching element, or the 3720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # default value no element was found. Note that if the element 3730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # is found, but has no text content, this method returns an 3740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # empty string. 3750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @defreturn string 3760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def findtext(self, path, default=None, namespaces=None): 3780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return ElementPath.findtext(self, path, default, namespaces) 3790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 3810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Finds all matching subelements, by tag name or path. 3820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 3830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param path What element to look for. 3840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @keyparam namespaces Optional namespace prefix map. 3850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @return A list or other sequence containing all matching elements, 3860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # in document order. 3870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @defreturn list of Element instances 3880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def findall(self, path, namespaces=None): 3900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return ElementPath.findall(self, path, namespaces) 3910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 3930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Finds all matching subelements, by tag name or path. 3940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 3950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param path What element to look for. 3960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @keyparam namespaces Optional namespace prefix map. 3970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @return An iterator or sequence containing all matching elements, 3980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # in document order. 3990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @defreturn a generated sequence of Element instances 4000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def iterfind(self, path, namespaces=None): 4020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return ElementPath.iterfind(self, path, namespaces) 4030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 4050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Resets an element. This function removes all subelements, clears 4060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # all attributes, and sets the <b>text</b> and <b>tail</b> attributes 4070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # to None. 4080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def clear(self): 4100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.attrib.clear() 4110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._children = [] 4120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.text = self.tail = None 4130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 4150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Gets an element attribute. Equivalent to <b>attrib.get</b>, but 4160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # some implementations may handle this a bit more efficiently. 4170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 4180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param key What attribute to look for. 4190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param default What to return if the attribute was not found. 4200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @return The attribute value, or the default value, if the 4210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # attribute was not found. 4220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @defreturn string or None 4230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def get(self, key, default=None): 4250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.attrib.get(key, default) 4260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 4280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Sets an element attribute. Equivalent to <b>attrib[key] = value</b>, 4290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # but some implementations may handle this a bit more efficiently. 4300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 4310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param key What attribute to set. 4320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param value The attribute value. 4330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def set(self, key, value): 4350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.attrib[key] = value 4360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 4380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Gets a list of attribute names. The names are returned in an 4390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # arbitrary order (just like for an ordinary Python dictionary). 4400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Equivalent to <b>attrib.keys()</b>. 4410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 4420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @return A list of element attribute names. 4430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @defreturn list of strings 4440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def keys(self): 4460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.attrib.keys() 4470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 4490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Gets element attributes, as a sequence. The attributes are 4500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # returned in an arbitrary order. Equivalent to <b>attrib.items()</b>. 4510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 4520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @return A list of (name, value) tuples for all attributes. 4530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @defreturn list of (string, string) tuples 4540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def items(self): 4560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.attrib.items() 4570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 4590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Creates a tree iterator. The iterator loops over this element 4600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # and all subelements, in document order, and returns all elements 4610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # with a matching tag. 4620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # <p> 4630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # If the tree structure is modified during iteration, new or removed 4640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # elements may or may not be included. To get a stable set, use the 4650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # list() function on the iterator, and loop over the resulting list. 4660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 4670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param tag What tags to look for (default is to return all elements). 4680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @return An iterator containing all the matching elements. 4690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @defreturn iterator 4700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def iter(self, tag=None): 4720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tag == "*": 4730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tag = None 4740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tag is None or self.tag == tag: 4750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao yield self 4760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for e in self._children: 4770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for e in e.iter(tag): 4780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao yield e 4790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # compatibility 4810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def getiterator(self, tag=None): 4820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Change for a DeprecationWarning in 1.4 4830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao warnings.warn( 4840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "This method will be removed in future versions. " 4850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "Use 'elem.iter()' or 'list(elem.iter())' instead.", 4860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao PendingDeprecationWarning, stacklevel=2 4870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 4880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return list(self.iter(tag)) 4890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 4910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Creates a text iterator. The iterator loops over this element 4920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # and all subelements, in document order, and returns all inner 4930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # text. 4940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 4950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @return An iterator containing all inner text. 4960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @defreturn iterator 4970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def itertext(self): 4990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tag = self.tag 5000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not isinstance(tag, basestring) and tag is not None: 5010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 5020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.text: 5030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao yield self.text 5040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for e in self: 5050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for s in e.itertext(): 5060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao yield s 5070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if e.tail: 5080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao yield e.tail 5090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# compatibility 5110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_Element = _ElementInterface = Element 5120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao## 5140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Subelement factory. This function creates an element instance, and 5150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# appends it to an existing element. 5160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# <p> 5170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# The element name, attribute names, and attribute values can be 5180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# either 8-bit ASCII strings or Unicode strings. 5190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 5200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param parent The parent element. 5210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param tag The subelement name. 5220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param attrib An optional dictionary, containing element attributes. 5230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param **extra Additional attributes, given as keyword arguments. 5240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @return An element instance. 5250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @defreturn Element 5260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5270a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef SubElement(parent, tag, attrib={}, **extra): 5280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao attrib = attrib.copy() 5290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao attrib.update(extra) 5300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao element = parent.makeelement(tag, attrib) 5310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parent.append(element) 5320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return element 5330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao## 5350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Comment element factory. This factory function creates a special 5360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# element that will be serialized as an XML comment by the standard 5370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# serializer. 5380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# <p> 5390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# The comment string can be either an 8-bit ASCII string or a Unicode 5400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# string. 5410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 5420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param text A string containing the comment string. 5430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @return An element instance, representing a comment. 5440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @defreturn Element 5450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5460a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef Comment(text=None): 5470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao element = Element(Comment) 5480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao element.text = text 5490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return element 5500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao## 5520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# PI element factory. This factory function creates a special element 5530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# that will be serialized as an XML processing instruction by the standard 5540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# serializer. 5550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 5560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param target A string containing the PI target. 5570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param text A string containing the PI contents, if any. 5580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @return An element instance, representing a PI. 5590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @defreturn Element 5600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5610a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef ProcessingInstruction(target, text=None): 5620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao element = Element(ProcessingInstruction) 5630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao element.text = target 5640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if text: 5650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao element.text = element.text + " " + text 5660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return element 5670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5680a8c90248264a8b26970b4473770bcc3df8515fJosh GaoPI = ProcessingInstruction 5690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao## 5710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# QName wrapper. This can be used to wrap a QName attribute value, in 5720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# order to get proper namespace handling on output. 5730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 5740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param text A string containing the QName value, in the form {uri}local, 5750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# or, if the tag argument is given, the URI part of a QName. 5760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param tag Optional tag. If given, the first argument is interpreted as 5770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# an URI, and this argument is interpreted as a local name. 5780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @return An opaque object, representing the QName. 5790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5800a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass QName(object): 5810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, text_or_uri, tag=None): 5820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tag: 5830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao text_or_uri = "{%s}%s" % (text_or_uri, tag) 5840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.text = text_or_uri 5850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __str__(self): 5860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.text 5870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __hash__(self): 5880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return hash(self.text) 5890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __cmp__(self, other): 5900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if isinstance(other, QName): 5910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return cmp(self.text, other.text) 5920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return cmp(self.text, other) 5930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# -------------------------------------------------------------------- 5950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao## 5970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# ElementTree wrapper class. This class represents an entire element 5980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# hierarchy, and adds some extra support for serialization to and from 5990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# standard XML. 6000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 6010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param element Optional root element. 6020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @keyparam file Optional file handle or file name. If given, the 6030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# tree is initialized with the contents of this XML file. 6040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6050a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass ElementTree(object): 6060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, element=None, file=None): 6080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # assert element is None or iselement(element) 6090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._root = element # first node 6100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if file: 6110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.parse(file) 6120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 6140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Gets the root element for this tree. 6150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 6160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @return An element instance. 6170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @defreturn Element 6180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def getroot(self): 6200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self._root 6210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 6230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Replaces the root element for this tree. This discards the 6240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # current contents of the tree, and replaces it with the given 6250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # element. Use with care. 6260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 6270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param element An element instance. 6280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _setroot(self, element): 6300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # assert iselement(element) 6310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._root = element 6320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 6340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Loads an external XML document into this element tree. 6350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 6360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param source A file name or file object. If a file object is 6370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # given, it only has to implement a <b>read(n)</b> method. 6380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @keyparam parser An optional parser instance. If not given, the 6390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # standard {@link XMLParser} parser is used. 6400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @return The document root element. 6410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @defreturn Element 6420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @exception ParseError If the parser fails to parse the document. 6430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def parse(self, source, parser=None): 6450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao close_source = False 6460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not hasattr(source, "read"): 6470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao source = open(source, "rb") 6480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao close_source = True 6490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 6500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not parser: 6510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser = XMLParser(target=TreeBuilder()) 6520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao while 1: 6530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao data = source.read(65536) 6540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not data: 6550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao break 6560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.feed(data) 6570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._root = parser.close() 6580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self._root 6590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao finally: 6600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if close_source: 6610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao source.close() 6620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 6640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Creates a tree iterator for the root element. The iterator loops 6650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # over all elements in this tree, in document order. 6660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 6670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param tag What tags to look for (default is to return all elements) 6680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @return An iterator. 6690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @defreturn iterator 6700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def iter(self, tag=None): 6720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # assert self._root is not None 6730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self._root.iter(tag) 6740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # compatibility 6760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def getiterator(self, tag=None): 6770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Change for a DeprecationWarning in 1.4 6780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao warnings.warn( 6790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "This method will be removed in future versions. " 6800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "Use 'tree.iter()' or 'list(tree.iter())' instead.", 6810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao PendingDeprecationWarning, stacklevel=2 6820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 6830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return list(self.iter(tag)) 6840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 6860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Same as getroot().find(path), starting at the root of the 6870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # tree. 6880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 6890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param path What element to look for. 6900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @keyparam namespaces Optional namespace prefix map. 6910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @return The first matching element, or None if no element was found. 6920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @defreturn Element or None 6930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def find(self, path, namespaces=None): 6950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # assert self._root is not None 6960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if path[:1] == "/": 6970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao path = "." + path 6980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao warnings.warn( 6990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "This search is broken in 1.3 and earlier, and will be " 7000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "fixed in a future version. If you rely on the current " 7010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "behaviour, change it to %r" % path, 7020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao FutureWarning, stacklevel=2 7030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 7040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self._root.find(path, namespaces) 7050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 7070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Same as getroot().findtext(path), starting at the root of the tree. 7080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 7090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param path What element to look for. 7100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param default What to return if the element was not found. 7110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @keyparam namespaces Optional namespace prefix map. 7120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @return The text content of the first matching element, or the 7130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # default value no element was found. Note that if the element 7140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # is found, but has no text content, this method returns an 7150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # empty string. 7160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @defreturn string 7170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def findtext(self, path, default=None, namespaces=None): 7190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # assert self._root is not None 7200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if path[:1] == "/": 7210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao path = "." + path 7220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao warnings.warn( 7230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "This search is broken in 1.3 and earlier, and will be " 7240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "fixed in a future version. If you rely on the current " 7250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "behaviour, change it to %r" % path, 7260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao FutureWarning, stacklevel=2 7270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 7280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self._root.findtext(path, default, namespaces) 7290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 7310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Same as getroot().findall(path), starting at the root of the tree. 7320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 7330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param path What element to look for. 7340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @keyparam namespaces Optional namespace prefix map. 7350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @return A list or iterator containing all matching elements, 7360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # in document order. 7370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @defreturn list of Element instances 7380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def findall(self, path, namespaces=None): 7400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # assert self._root is not None 7410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if path[:1] == "/": 7420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao path = "." + path 7430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao warnings.warn( 7440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "This search is broken in 1.3 and earlier, and will be " 7450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "fixed in a future version. If you rely on the current " 7460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "behaviour, change it to %r" % path, 7470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao FutureWarning, stacklevel=2 7480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 7490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self._root.findall(path, namespaces) 7500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 7520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Finds all matching subelements, by tag name or path. 7530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Same as getroot().iterfind(path). 7540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 7550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param path What element to look for. 7560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @keyparam namespaces Optional namespace prefix map. 7570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @return An iterator or sequence containing all matching elements, 7580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # in document order. 7590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @defreturn a generated sequence of Element instances 7600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def iterfind(self, path, namespaces=None): 7620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # assert self._root is not None 7630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if path[:1] == "/": 7640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao path = "." + path 7650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao warnings.warn( 7660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "This search is broken in 1.3 and earlier, and will be " 7670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "fixed in a future version. If you rely on the current " 7680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "behaviour, change it to %r" % path, 7690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao FutureWarning, stacklevel=2 7700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 7710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self._root.iterfind(path, namespaces) 7720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 7740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Writes the element tree to a file, as XML. 7750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 7760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @def write(file, **options) 7770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param file A file name, or a file object opened for writing. 7780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param **options Options, given as keyword arguments. 7790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @keyparam encoding Optional output encoding (default is US-ASCII). 7800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @keyparam xml_declaration Controls if an XML declaration should 7810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # be added to the file. Use False for never, True for always, 7820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # None for only if not US-ASCII or UTF-8. None is default. 7830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @keyparam default_namespace Sets the default XML namespace (for "xmlns"). 7840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @keyparam method Optional output method ("xml", "html", "text" or 7850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # "c14n"; default is "xml"). 7860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def write(self, file_or_filename, 7880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # keyword arguments 7890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao encoding=None, 7900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao xml_declaration=None, 7910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao default_namespace=None, 7920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao method=None): 7930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # assert self._root is not None 7940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not method: 7950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao method = "xml" 7960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif method not in _serialize: 7970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # FIXME: raise an ImportError for c14n if ElementC14N is missing? 7980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ValueError("unknown method %r" % method) 7990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if hasattr(file_or_filename, "write"): 8000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao file = file_or_filename 8010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 8020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao file = open(file_or_filename, "wb") 8030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao write = file.write 8040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not encoding: 8050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if method == "c14n": 8060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao encoding = "utf-8" 8070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 8080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao encoding = "us-ascii" 8090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif xml_declaration or (xml_declaration is None and 8100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao encoding not in ("utf-8", "us-ascii")): 8110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if method == "xml": 8120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao write("<?xml version='1.0' encoding='%s'?>\n" % encoding) 8130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if method == "text": 8140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _serialize_text(write, self._root, encoding) 8150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 8160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao qnames, namespaces = _namespaces( 8170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._root, encoding, default_namespace 8180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 8190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao serialize = _serialize[method] 8200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao serialize(write, self._root, encoding, qnames, namespaces) 8210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if file_or_filename is not file: 8220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao file.close() 8230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def write_c14n(self, file): 8250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # lxml.etree compatibility. use output method instead 8260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.write(file, method="c14n") 8270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# -------------------------------------------------------------------- 8290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# serialization support 8300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8310a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _namespaces(elem, encoding, default_namespace=None): 8320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # identify namespaces used in this tree 8330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # maps qnames to *encoded* prefix:local names 8350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao qnames = {None: None} 8360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # maps uri:s to prefixes 8380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao namespaces = {} 8390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if default_namespace: 8400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao namespaces[default_namespace] = "" 8410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def encode(text): 8430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return text.encode(encoding) 8440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def add_qname(qname): 8460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # calculate serialized qname representation 8470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 8480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if qname[:1] == "{": 8490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao uri, tag = qname[1:].rsplit("}", 1) 8500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao prefix = namespaces.get(uri) 8510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if prefix is None: 8520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao prefix = _namespace_map.get(uri) 8530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if prefix is None: 8540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao prefix = "ns%d" % len(namespaces) 8550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if prefix != "xml": 8560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao namespaces[uri] = prefix 8570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if prefix: 8580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao qnames[qname] = encode("%s:%s" % (prefix, tag)) 8590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 8600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao qnames[qname] = encode(tag) # default element 8610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 8620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if default_namespace: 8630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # FIXME: can this be handled in XML 1.0? 8640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ValueError( 8650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "cannot use non-qualified names with " 8660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "default_namespace option" 8670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 8680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao qnames[qname] = encode(qname) 8690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except TypeError: 8700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _raise_serialization_error(qname) 8710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # populate qname and namespaces table 8730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 8740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao iterate = elem.iter 8750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except AttributeError: 8760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao iterate = elem.getiterator # cET compatibility 8770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for elem in iterate(): 8780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tag = elem.tag 8790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if isinstance(tag, QName): 8800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tag.text not in qnames: 8810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao add_qname(tag.text) 8820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif isinstance(tag, basestring): 8830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tag not in qnames: 8840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao add_qname(tag) 8850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif tag is not None and tag is not Comment and tag is not PI: 8860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _raise_serialization_error(tag) 8870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for key, value in elem.items(): 8880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if isinstance(key, QName): 8890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao key = key.text 8900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if key not in qnames: 8910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao add_qname(key) 8920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if isinstance(value, QName) and value.text not in qnames: 8930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao add_qname(value.text) 8940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao text = elem.text 8950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if isinstance(text, QName) and text.text not in qnames: 8960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao add_qname(text.text) 8970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return qnames, namespaces 8980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8990a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _serialize_xml(write, elem, encoding, qnames, namespaces): 9000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tag = elem.tag 9010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao text = elem.text 9020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tag is Comment: 9030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao write("<!--%s-->" % _encode(text, encoding)) 9040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif tag is ProcessingInstruction: 9050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao write("<?%s?>" % _encode(text, encoding)) 9060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 9070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tag = qnames[tag] 9080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tag is None: 9090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if text: 9100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao write(_escape_cdata(text, encoding)) 9110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for e in elem: 9120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _serialize_xml(write, e, encoding, qnames, None) 9130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 9140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao write("<" + tag) 9150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao items = elem.items() 9160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if items or namespaces: 9170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if namespaces: 9180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for v, k in sorted(namespaces.items(), 9190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao key=lambda x: x[1]): # sort on prefix 9200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if k: 9210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao k = ":" + k 9220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao write(" xmlns%s=\"%s\"" % ( 9230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao k.encode(encoding), 9240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _escape_attrib(v, encoding) 9250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao )) 9260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for k, v in sorted(items): # lexical order 9270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if isinstance(k, QName): 9280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao k = k.text 9290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if isinstance(v, QName): 9300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao v = qnames[v.text] 9310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 9320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao v = _escape_attrib(v, encoding) 9330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao write(" %s=\"%s\"" % (qnames[k], v)) 9340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if text or len(elem): 9350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao write(">") 9360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if text: 9370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao write(_escape_cdata(text, encoding)) 9380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for e in elem: 9390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _serialize_xml(write, e, encoding, qnames, None) 9400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao write("</" + tag + ">") 9410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 9420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao write(" />") 9430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if elem.tail: 9440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao write(_escape_cdata(elem.tail, encoding)) 9450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9460a8c90248264a8b26970b4473770bcc3df8515fJosh GaoHTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", 9470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "img", "input", "isindex", "link", "meta", "param") 9480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9490a8c90248264a8b26970b4473770bcc3df8515fJosh Gaotry: 9500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao HTML_EMPTY = set(HTML_EMPTY) 9510a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoexcept NameError: 9520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 9530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9540a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _serialize_html(write, elem, encoding, qnames, namespaces): 9550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tag = elem.tag 9560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao text = elem.text 9570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tag is Comment: 9580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao write("<!--%s-->" % _escape_cdata(text, encoding)) 9590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif tag is ProcessingInstruction: 9600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao write("<?%s?>" % _escape_cdata(text, encoding)) 9610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 9620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tag = qnames[tag] 9630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tag is None: 9640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if text: 9650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao write(_escape_cdata(text, encoding)) 9660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for e in elem: 9670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _serialize_html(write, e, encoding, qnames, None) 9680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 9690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao write("<" + tag) 9700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao items = elem.items() 9710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if items or namespaces: 9720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if namespaces: 9730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for v, k in sorted(namespaces.items(), 9740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao key=lambda x: x[1]): # sort on prefix 9750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if k: 9760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao k = ":" + k 9770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao write(" xmlns%s=\"%s\"" % ( 9780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao k.encode(encoding), 9790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _escape_attrib(v, encoding) 9800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao )) 9810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for k, v in sorted(items): # lexical order 9820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if isinstance(k, QName): 9830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao k = k.text 9840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if isinstance(v, QName): 9850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao v = qnames[v.text] 9860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 9870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao v = _escape_attrib_html(v, encoding) 9880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # FIXME: handle boolean attributes 9890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao write(" %s=\"%s\"" % (qnames[k], v)) 9900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao write(">") 9910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tag = tag.lower() 9920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if text: 9930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tag == "script" or tag == "style": 9940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao write(_encode(text, encoding)) 9950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 9960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao write(_escape_cdata(text, encoding)) 9970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for e in elem: 9980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _serialize_html(write, e, encoding, qnames, None) 9990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tag not in HTML_EMPTY: 10000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao write("</" + tag + ">") 10010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if elem.tail: 10020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao write(_escape_cdata(elem.tail, encoding)) 10030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10040a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _serialize_text(write, elem, encoding): 10050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for part in elem.itertext(): 10060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao write(part.encode(encoding)) 10070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if elem.tail: 10080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao write(elem.tail.encode(encoding)) 10090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_serialize = { 10110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "xml": _serialize_xml, 10120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "html": _serialize_html, 10130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "text": _serialize_text, 10140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# this optional method is imported at the end of the module 10150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# "c14n": _serialize_c14n, 10160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao} 10170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao## 10190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Registers a namespace prefix. The registry is global, and any 10200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# existing mapping for either the given prefix or the namespace URI 10210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# will be removed. 10220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 10230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param prefix Namespace prefix. 10240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param uri Namespace uri. Tags and attributes in this namespace 10250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# will be serialized with the given prefix, if at all possible. 10260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @exception ValueError If the prefix is reserved, or is otherwise 10270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# invalid. 10280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10290a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef register_namespace(prefix, uri): 10300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if re.match("ns\d+$", prefix): 10310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ValueError("Prefix format reserved for internal use") 10320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for k, v in _namespace_map.items(): 10330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if k == uri or v == prefix: 10340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao del _namespace_map[k] 10350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _namespace_map[uri] = prefix 10360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_namespace_map = { 10380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # "well-known" namespace prefixes 10390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "http://www.w3.org/XML/1998/namespace": "xml", 10400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "http://www.w3.org/1999/xhtml": "html", 10410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", 10420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "http://schemas.xmlsoap.org/wsdl/": "wsdl", 10430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # xml schema 10440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "http://www.w3.org/2001/XMLSchema": "xs", 10450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "http://www.w3.org/2001/XMLSchema-instance": "xsi", 10460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # dublin core 10470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "http://purl.org/dc/elements/1.1/": "dc", 10480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao} 10490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10500a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _raise_serialization_error(text): 10510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise TypeError( 10520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "cannot serialize %r (type %s)" % (text, type(text).__name__) 10530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 10540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10550a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _encode(text, encoding): 10560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 10570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return text.encode(encoding, "xmlcharrefreplace") 10580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except (TypeError, AttributeError): 10590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _raise_serialization_error(text) 10600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10610a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _escape_cdata(text, encoding): 10620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # escape character data 10630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 10640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # it's worth avoiding do-nothing calls for strings that are 10650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # shorter than 500 character, or so. assume that's, by far, 10660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # the most common case in most applications. 10670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if "&" in text: 10680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao text = text.replace("&", "&") 10690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if "<" in text: 10700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao text = text.replace("<", "<") 10710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if ">" in text: 10720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao text = text.replace(">", ">") 10730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return text.encode(encoding, "xmlcharrefreplace") 10740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except (TypeError, AttributeError): 10750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _raise_serialization_error(text) 10760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10770a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _escape_attrib(text, encoding): 10780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # escape attribute value 10790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 10800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if "&" in text: 10810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao text = text.replace("&", "&") 10820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if "<" in text: 10830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao text = text.replace("<", "<") 10840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if ">" in text: 10850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao text = text.replace(">", ">") 10860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if "\"" in text: 10870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao text = text.replace("\"", """) 10880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if "\n" in text: 10890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao text = text.replace("\n", " ") 10900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return text.encode(encoding, "xmlcharrefreplace") 10910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except (TypeError, AttributeError): 10920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _raise_serialization_error(text) 10930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10940a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _escape_attrib_html(text, encoding): 10950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # escape attribute value 10960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 10970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if "&" in text: 10980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao text = text.replace("&", "&") 10990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if ">" in text: 11000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao text = text.replace(">", ">") 11010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if "\"" in text: 11020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao text = text.replace("\"", """) 11030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return text.encode(encoding, "xmlcharrefreplace") 11040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except (TypeError, AttributeError): 11050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _raise_serialization_error(text) 11060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# -------------------------------------------------------------------- 11080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao## 11100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Generates a string representation of an XML element, including all 11110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# subelements. 11120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 11130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param element An Element instance. 11140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @keyparam encoding Optional output encoding (default is US-ASCII). 11150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @keyparam method Optional output method ("xml", "html", "text" or 11160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# "c14n"; default is "xml"). 11170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @return An encoded string containing the XML data. 11180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @defreturn string 11190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11200a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef tostring(element, encoding=None, method=None): 11210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao class dummy: 11220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 11230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao data = [] 11240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao file = dummy() 11250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao file.write = data.append 11260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ElementTree(element).write(file, encoding, method=method) 11270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return "".join(data) 11280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao## 11300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Generates a string representation of an XML element, including all 11310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# subelements. The string is returned as a sequence of string fragments. 11320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 11330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param element An Element instance. 11340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @keyparam encoding Optional output encoding (default is US-ASCII). 11350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @keyparam method Optional output method ("xml", "html", "text" or 11360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# "c14n"; default is "xml"). 11370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @return A sequence object containing the XML data. 11380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @defreturn sequence 11390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @since 1.3 11400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11410a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef tostringlist(element, encoding=None, method=None): 11420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao class dummy: 11430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 11440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao data = [] 11450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao file = dummy() 11460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao file.write = data.append 11470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ElementTree(element).write(file, encoding, method=method) 11480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # FIXME: merge small fragments into larger parts 11490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return data 11500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao## 11520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Writes an element tree or element structure to sys.stdout. This 11530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# function should be used for debugging only. 11540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# <p> 11550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# The exact output format is implementation dependent. In this 11560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# version, it's written as an ordinary XML file. 11570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 11580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param elem An element tree or an individual element. 11590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11600a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef dump(elem): 11610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # debugging 11620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not isinstance(elem, ElementTree): 11630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elem = ElementTree(elem) 11640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elem.write(sys.stdout) 11650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tail = elem.getroot().tail 11660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not tail or tail[-1] != "\n": 11670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sys.stdout.write("\n") 11680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# -------------------------------------------------------------------- 11700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# parsing 11710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao## 11730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Parses an XML document into an element tree. 11740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 11750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param source A filename or file object containing XML data. 11760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param parser An optional parser instance. If not given, the 11770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# standard {@link XMLParser} parser is used. 11780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @return An ElementTree instance 11790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11800a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef parse(source, parser=None): 11810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tree = ElementTree() 11820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tree.parse(source, parser) 11830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return tree 11840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao## 11860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Parses an XML document into an element tree incrementally, and reports 11870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# what's going on to the user. 11880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 11890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param source A filename or file object containing XML data. 11900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param events A list of events to report back. If omitted, only "end" 11910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# events are reported. 11920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param parser An optional parser instance. If not given, the 11930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# standard {@link XMLParser} parser is used. 11940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @return A (event, elem) iterator. 11950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11960a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef iterparse(source, events=None, parser=None): 11970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao close_source = False 11980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not hasattr(source, "read"): 11990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao source = open(source, "rb") 12000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao close_source = True 12010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not parser: 12020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser = XMLParser(target=TreeBuilder()) 12030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return _IterParseIterator(source, events, parser, close_source) 12040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12050a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass _IterParseIterator(object): 12060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, source, events, parser, close_source=False): 12080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._file = source 12090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._close_file = close_source 12100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._events = [] 12110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._index = 0 12120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._error = None 12130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.root = self._root = None 12140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._parser = parser 12150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # wire up the parser for event reporting 12160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser = self._parser._parser 12170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao append = self._events.append 12180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if events is None: 12190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao events = ["end"] 12200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for event in events: 12210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if event == "start": 12220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 12230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.ordered_attributes = 1 12240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.specified_attributes = 1 12250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def handler(tag, attrib_in, event=event, append=append, 12260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao start=self._parser._start_list): 12270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao append((event, start(tag, attrib_in))) 12280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.StartElementHandler = handler 12290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except AttributeError: 12300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def handler(tag, attrib_in, event=event, append=append, 12310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao start=self._parser._start): 12320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao append((event, start(tag, attrib_in))) 12330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.StartElementHandler = handler 12340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif event == "end": 12350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def handler(tag, event=event, append=append, 12360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao end=self._parser._end): 12370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao append((event, end(tag))) 12380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.EndElementHandler = handler 12390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif event == "start-ns": 12400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def handler(prefix, uri, event=event, append=append): 12410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 12420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao uri = (uri or "").encode("ascii") 12430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except UnicodeError: 12440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 12450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao append((event, (prefix or "", uri or ""))) 12460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.StartNamespaceDeclHandler = handler 12470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif event == "end-ns": 12480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def handler(prefix, event=event, append=append): 12490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao append((event, None)) 12500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.EndNamespaceDeclHandler = handler 12510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 12520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ValueError("unknown event %r" % event) 12530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def next(self): 12550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao while 1: 12560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 12570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao item = self._events[self._index] 12580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._index += 1 12590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return item 12600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except IndexError: 12610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 12620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._error: 12630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao e = self._error 12640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._error = None 12650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise e 12660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._parser is None: 12670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.root = self._root 12680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._close_file: 12690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._file.close() 12700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise StopIteration 12710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # load event buffer 12720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao del self._events[:] 12730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._index = 0 12740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao data = self._file.read(16384) 12750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if data: 12760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 12770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._parser.feed(data) 12780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except SyntaxError as exc: 12790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._error = exc 12800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 12810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._root = self._parser.close() 12820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._parser = None 12830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __iter__(self): 12850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self 12860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao## 12880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Parses an XML document from a string constant. This function can 12890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# be used to embed "XML literals" in Python code. 12900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 12910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param source A string containing XML data. 12920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param parser An optional parser instance. If not given, the 12930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# standard {@link XMLParser} parser is used. 12940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @return An Element instance. 12950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @defreturn Element 12960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12970a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef XML(text, parser=None): 12980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not parser: 12990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser = XMLParser(target=TreeBuilder()) 13000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.feed(text) 13010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return parser.close() 13020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao## 13040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Parses an XML document from a string constant, and also returns 13050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# a dictionary which maps from element id:s to elements. 13060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 13070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param source A string containing XML data. 13080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param parser An optional parser instance. If not given, the 13090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# standard {@link XMLParser} parser is used. 13100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @return A tuple containing an Element instance and a dictionary. 13110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @defreturn (Element, dictionary) 13120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13130a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef XMLID(text, parser=None): 13140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not parser: 13150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser = XMLParser(target=TreeBuilder()) 13160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.feed(text) 13170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tree = parser.close() 13180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ids = {} 13190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for elem in tree.iter(): 13200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao id = elem.get("id") 13210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if id: 13220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ids[id] = elem 13230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return tree, ids 13240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao## 13260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Parses an XML document from a string constant. Same as {@link #XML}. 13270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 13280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @def fromstring(text) 13290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param source A string containing XML data. 13300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @return An Element instance. 13310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @defreturn Element 13320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13330a8c90248264a8b26970b4473770bcc3df8515fJosh Gaofromstring = XML 13340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao## 13360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Parses an XML document from a sequence of string fragments. 13370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 13380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param sequence A list or other sequence containing XML data fragments. 13390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param parser An optional parser instance. If not given, the 13400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# standard {@link XMLParser} parser is used. 13410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @return An Element instance. 13420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @defreturn Element 13430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @since 1.3 13440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13450a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef fromstringlist(sequence, parser=None): 13460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not parser: 13470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser = XMLParser(target=TreeBuilder()) 13480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for text in sequence: 13490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.feed(text) 13500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return parser.close() 13510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# -------------------------------------------------------------------- 13530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao## 13550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Generic element structure builder. This builder converts a sequence 13560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link 13570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# #TreeBuilder.end} method calls to a well-formed element structure. 13580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# <p> 13590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# You can use this class to build an element structure using a custom XML 13600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# parser, or a parser for some other XML-like format. 13610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 13620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @param element_factory Optional element factory. This factory 13630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# is called to create new Element instances, as necessary. 13640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13650a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass TreeBuilder(object): 13660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, element_factory=None): 13680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._data = [] # data collector 13690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._elem = [] # element stack 13700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._last = None # last element 13710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._tail = None # true if we're after an end tag 13720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if element_factory is None: 13730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao element_factory = Element 13740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._factory = element_factory 13750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 13770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Flushes the builder buffers, and returns the toplevel document 13780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # element. 13790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 13800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @return An Element instance. 13810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @defreturn Element 13820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def close(self): 13840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao assert len(self._elem) == 0, "missing end tags" 13850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao assert self._last is not None, "missing toplevel element" 13860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self._last 13870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _flush(self): 13890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._data: 13900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._last is not None: 13910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao text = "".join(self._data) 13920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._tail: 13930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao assert self._last.tail is None, "internal error (tail)" 13940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._last.tail = text 13950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 13960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao assert self._last.text is None, "internal error (text)" 13970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._last.text = text 13980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._data = [] 13990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 14010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Adds text to the current element. 14020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 14030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param data A string. This should be either an 8-bit string 14040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # containing ASCII text, or a Unicode string. 14050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def data(self, data): 14070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._data.append(data) 14080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 14100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Opens a new element. 14110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 14120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param tag The element name. 14130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param attrib A dictionary containing element attributes. 14140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @return The opened element. 14150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @defreturn Element 14160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def start(self, tag, attrs): 14180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._flush() 14190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._last = elem = self._factory(tag, attrs) 14200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._elem: 14210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._elem[-1].append(elem) 14220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._elem.append(elem) 14230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._tail = 0 14240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return elem 14250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 14270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Closes the current element. 14280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 14290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param tag The element name. 14300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @return The closed element. 14310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @defreturn Element 14320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def end(self, tag): 14340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._flush() 14350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._last = self._elem.pop() 14360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao assert self._last.tag == tag,\ 14370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "end tag mismatch (expected %s, got %s)" % ( 14380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._last.tag, tag) 14390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._tail = 1 14400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self._last 14410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao## 14430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Element structure builder for XML source data, based on the 14440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# <b>expat</b> parser. 14450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 14460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @keyparam target Target object. If omitted, the builder uses an 14470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# instance of the standard {@link #TreeBuilder} class. 14480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @keyparam html Predefine HTML entities. This flag is not supported 14490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# by the current implementation. 14500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @keyparam encoding Optional encoding. If given, the value overrides 14510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# the encoding specified in the XML file. 14520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @see #ElementTree 14530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# @see #TreeBuilder 14540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14550a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass XMLParser(object): 14560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, html=0, target=None, encoding=None): 14580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 14590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao from xml.parsers import expat 14600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except ImportError: 14610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 14620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import pyexpat as expat 14630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except ImportError: 14640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ImportError( 14650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "No module named expat; use SimpleXMLTreeBuilder instead" 14660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 14670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser = expat.ParserCreate(encoding, "}") 14680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if target is None: 14690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao target = TreeBuilder() 14700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # underscored names are provided for compatibility only 14710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.parser = self._parser = parser 14720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.target = self._target = target 14730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._error = expat.error 14740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._names = {} # name memo cache 14750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # callbacks 14760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.DefaultHandlerExpand = self._default 14770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.StartElementHandler = self._start 14780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.EndElementHandler = self._end 14790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.CharacterDataHandler = self._data 14800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # optional callbacks 14810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.CommentHandler = self._comment 14820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.ProcessingInstructionHandler = self._pi 14830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # let expat do the buffering, if supported 14840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 14850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._parser.buffer_text = 1 14860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except AttributeError: 14870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 14880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # use new-style attribute handling, if supported 14890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 14900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._parser.ordered_attributes = 1 14910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._parser.specified_attributes = 1 14920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parser.StartElementHandler = self._start_list 14930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except AttributeError: 14940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 14950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._doctype = None 14960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.entity = {} 14970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 14980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.version = "Expat %d.%d.%d" % expat.version_info 14990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except AttributeError: 15000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass # unknown 15010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _raiseerror(self, value): 15030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao err = ParseError(value) 15040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao err.code = value.code 15050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao err.position = value.lineno, value.offset 15060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise err 15070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _fixtext(self, text): 15090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # convert text string to ascii, if possible 15100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 15110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return text.encode("ascii") 15120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except UnicodeError: 15130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return text 15140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _fixname(self, key): 15160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # expand qname, and convert name string to ascii, if possible 15170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 15180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao name = self._names[key] 15190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except KeyError: 15200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao name = key 15210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if "}" in name: 15220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao name = "{" + name 15230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._names[key] = name = self._fixtext(name) 15240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return name 15250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _start(self, tag, attrib_in): 15270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fixname = self._fixname 15280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fixtext = self._fixtext 15290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tag = fixname(tag) 15300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao attrib = {} 15310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for key, value in attrib_in.items(): 15320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao attrib[fixname(key)] = fixtext(value) 15330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.target.start(tag, attrib) 15340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _start_list(self, tag, attrib_in): 15360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fixname = self._fixname 15370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fixtext = self._fixtext 15380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tag = fixname(tag) 15390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao attrib = {} 15400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if attrib_in: 15410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for i in range(0, len(attrib_in), 2): 15420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao attrib[fixname(attrib_in[i])] = fixtext(attrib_in[i+1]) 15430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.target.start(tag, attrib) 15440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _data(self, text): 15460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.target.data(self._fixtext(text)) 15470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _end(self, tag): 15490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.target.end(self._fixname(tag)) 15500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _comment(self, data): 15520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 15530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao comment = self.target.comment 15540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except AttributeError: 15550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 15560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 15570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return comment(self._fixtext(data)) 15580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _pi(self, target, data): 15600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 15610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pi = self.target.pi 15620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except AttributeError: 15630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 15640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 15650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return pi(self._fixtext(target), self._fixtext(data)) 15660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _default(self, text): 15680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao prefix = text[:1] 15690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if prefix == "&": 15700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # deal with undefined entities 15710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 15720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.target.data(self.entity[text[1:-1]]) 15730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except KeyError: 15740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao from xml.parsers import expat 15750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao err = expat.error( 15760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "undefined entity %s: line %d, column %d" % 15770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (text, self._parser.ErrorLineNumber, 15780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._parser.ErrorColumnNumber) 15790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 15800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao err.code = 11 # XML_ERROR_UNDEFINED_ENTITY 15810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao err.lineno = self._parser.ErrorLineNumber 15820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao err.offset = self._parser.ErrorColumnNumber 15830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise err 15840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif prefix == "<" and text[:9] == "<!DOCTYPE": 15850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._doctype = [] # inside a doctype declaration 15860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif self._doctype is not None: 15870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # parse doctype contents 15880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if prefix == ">": 15890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._doctype = None 15900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 15910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao text = text.strip() 15920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not text: 15930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 15940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._doctype.append(text) 15950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao n = len(self._doctype) 15960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if n > 2: 15970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao type = self._doctype[1] 15980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if type == "PUBLIC" and n == 4: 15990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao name, type, pubid, system = self._doctype 16000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif type == "SYSTEM" and n == 3: 16010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao name, type, system = self._doctype 16020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pubid = None 16030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 16040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 16050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if pubid: 16060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pubid = pubid[1:-1] 16070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if hasattr(self.target, "doctype"): 16080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.target.doctype(name, pubid, system[1:-1]) 16090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif self.doctype is not self._XMLParser__doctype: 16100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # warn about deprecated call 16110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._XMLParser__doctype(name, pubid, system[1:-1]) 16120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.doctype(name, pubid, system[1:-1]) 16130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._doctype = None 16140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 16150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 16160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # (Deprecated) Handles a doctype declaration. 16170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 16180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param name Doctype name. 16190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param pubid Public identifier. 16200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param system System identifier. 16210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 16220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def doctype(self, name, pubid, system): 16230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """This method of XMLParser is deprecated.""" 16240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao warnings.warn( 16250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "This method of XMLParser is deprecated. Define doctype() " 16260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "method on the TreeBuilder target.", 16270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao DeprecationWarning, 16280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 16290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 16300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # sentinel, if doctype is redefined in a subclass 16310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao __doctype = doctype 16320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 16330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 16340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Feeds data to the parser. 16350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 16360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @param data Encoded data. 16370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 16380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def feed(self, data): 16390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 16400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._parser.Parse(data, 0) 16410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except self._error, v: 16420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._raiseerror(v) 16430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 16440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ## 16450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Finishes feeding data to the parser. 16460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 16470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @return An element structure. 16480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # @defreturn Element 16490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 16500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def close(self): 16510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 16520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._parser.Parse("", 1) # end of data 16530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except self._error, v: 16540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._raiseerror(v) 16550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tree = self.target.close() 16560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao del self.target, self._parser # get rid of circular references 16570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return tree 16580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 16590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# compatibility 16600a8c90248264a8b26970b4473770bcc3df8515fJosh GaoXMLTreeBuilder = XMLParser 16610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 16620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# workaround circular import. 16630a8c90248264a8b26970b4473770bcc3df8515fJosh Gaotry: 16640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao from ElementC14N import _serialize_c14n 16650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _serialize["c14n"] = _serialize_c14n 16660a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoexcept ImportError: 16670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 1668