14710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 24710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# ElementTree 34710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# $Id: ElementTree.py 3440 2008-07-18 14:45:01Z fredrik $ 44710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 54710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# light-weight XML support for Python 2.3 and later. 64710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 74710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# history (since 1.2.6): 84710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 2005-11-12 fl added tostringlist/fromstringlist helpers 94710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 2006-07-05 fl merged in selected changes from the 1.3 sandbox 104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 2006-07-05 fl removed support for 2.1 and earlier 114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 2007-06-21 fl added deprecation/future warnings 124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 2007-08-25 fl added doctype hook, added parser version attribute etc 134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 2007-08-26 fl added new serializer code (better namespace handling, etc) 144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 2007-08-27 fl warn for broken /tag searches on tree level 154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 2007-09-02 fl added html/text methods to serializer (experimental) 164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 2007-09-05 fl added method argument to tostring/tostringlist 174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 2007-09-06 fl improved error handling 184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 2007-09-13 fl added itertext, iterfind; assorted cleanups 194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 2007-12-15 fl added C14N hooks, copy method (experimental) 204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved. 224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# fredrik@pythonware.com 244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# http://www.pythonware.com 254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# -------------------------------------------------------------------- 274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# The ElementTree toolkit is 284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Copyright (c) 1999-2008 by Fredrik Lundh 304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# By obtaining, using, and/or copying this software and/or its 324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# associated documentation, you agree that you have read, understood, 334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# and will comply with the following terms and conditions: 344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Permission to use, copy, modify, and distribute this software and 364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# its associated documentation for any purpose and without fee is 374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# hereby granted, provided that the above copyright notice appears in 384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# all copies, and that both that copyright notice and this permission 394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# notice appear in supporting documentation, and that the name of 404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Secret Labs AB or the author not be used in advertising or publicity 414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# pertaining to distribution of the software without specific, written 424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# prior permission. 434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD 454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- 464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR 474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# OF THIS SOFTWARE. 524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# -------------------------------------------------------------------- 534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Licensed to PSF under a Contributor Agreement. 554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# See http://www.python.org/psf/license for licensing details. 564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm__all__ = [ 584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # public symbols 594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "Comment", 604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "dump", 614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "Element", "ElementTree", 624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "fromstring", "fromstringlist", 634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "iselement", "iterparse", 644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "parse", "ParseError", 654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "PI", "ProcessingInstruction", 664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "QName", 674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "SubElement", 684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "tostring", "tostringlist", 694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "TreeBuilder", 704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "VERSION", 714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "XML", 724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "XMLParser", "XMLTreeBuilder", 734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ] 744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmVERSION = "1.3.0" 764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm## 784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# The <b>Element</b> type is a flexible container object, designed to 794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# store hierarchical data structures in memory. The type can be 804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# described as a cross between a list and a dictionary. 814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# <p> 824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Each element has a number of properties associated with it: 834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# <ul> 844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# <li>a <i>tag</i>. This is a string identifying what kind of data 854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# this element represents (the element type, in other words).</li> 864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# <li>a number of <i>attributes</i>, stored in a Python dictionary.</li> 874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# <li>a <i>text</i> string.</li> 884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# <li>an optional <i>tail</i> string.</li> 894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# <li>a number of <i>child elements</i>, stored in a Python sequence</li> 904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# </ul> 914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# To create an element instance, use the {@link #Element} constructor 934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# or the {@link #SubElement} factory function. 944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# <p> 954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# The {@link #ElementTree} class can be used to wrap an element 964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# structure, and convert it from and to XML. 974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm## 984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport sys 1004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport re 1014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport warnings 1024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass _SimpleElementPath(object): 1054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # emulate pre-1.2 find/findtext/findall behaviour 1064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def find(self, element, tag, namespaces=None): 1074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for elem in element: 1084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if elem.tag == tag: 1094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return elem 1104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return None 1114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def findtext(self, element, tag, default=None, namespaces=None): 1124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elem = self.find(element, tag) 1134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if elem is None: 1144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return default 1154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return elem.text or "" 1164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def iterfind(self, element, tag, namespaces=None): 1174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if tag[:3] == ".//": 1184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for elem in element.iter(tag[3:]): 1194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yield elem 1204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for elem in element: 1214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if elem.tag == tag: 1224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yield elem 1234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def findall(self, element, tag, namespaces=None): 1244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return list(self.iterfind(element, tag, namespaces)) 1254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmtry: 1274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm from . import ElementPath 1284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmexcept ImportError: 1294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ElementPath = _SimpleElementPath() 1304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm## 1324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Parser error. This is a subclass of <b>SyntaxError</b>. 1334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# <p> 1344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# In addition to the exception value, an exception instance contains a 1354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# specific exception code in the <b>code</b> attribute, and the line and 1364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# column of the error in the <b>position</b> attribute. 1374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass ParseError(SyntaxError): 1394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm pass 1404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# -------------------------------------------------------------------- 1424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm## 1444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Checks if an object appears to be a valid element object. 1454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 1464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param An element instance. 1474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @return A true value if this is an element object. 1484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @defreturn flag 1494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef iselement(element): 1514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # FIXME: not sure about this; might be a better idea to look 1524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # for tag/attrib/text attributes 1534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return isinstance(element, Element) or hasattr(element, "tag") 1544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm## 1564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Element class. This class defines the Element interface, and 1574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# provides a reference implementation of this interface. 1584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# <p> 1594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# The element name, attribute names, and attribute values can be 1604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# either ASCII strings (ordinary Python strings containing only 7-bit 1614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# ASCII characters) or Unicode strings. 1624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 1634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param tag The element name. 1644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param attrib An optional dictionary, containing element attributes. 1654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param **extra Additional attributes, given as keyword arguments. 1664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @see Element 1674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @see SubElement 1684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @see Comment 1694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @see ProcessingInstruction 1704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass Element(object): 1724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # <tag attrib>text<child/>...</tag>tail 1734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 1754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # (Attribute) Element tag. 1764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm tag = None 1784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 1804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # (Attribute) Element attribute dictionary. Where possible, use 1814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # {@link #Element.get}, 1824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # {@link #Element.set}, 1834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # {@link #Element.keys}, and 1844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # {@link #Element.items} to access 1854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # element attributes. 1864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm attrib = None 1884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 1904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # (Attribute) Text before first subelement. This is either a 1914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # string or the value None. Note that if there was no text, this 1924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # attribute may be either None or an empty string, depending on 1934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # the parser. 1944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm text = None 1964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 1984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # (Attribute) Text after this element's end tag, but before the 1994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # next sibling element's start tag. This is either a string or 2004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # the value None. Note that if there was no text, this attribute 2014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # may be either None or an empty string, depending on the parser. 2024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm tail = None # text after end tag, if any 2044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # constructor 2064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self, tag, attrib={}, **extra): 2084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm attrib = attrib.copy() 2094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm attrib.update(extra) 2104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.tag = tag 2114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.attrib = attrib 2124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._children = [] 2134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __repr__(self): 2154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return "<Element %s at 0x%x>" % (repr(self.tag), id(self)) 2164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 2184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Creates a new element object of the same type as this element. 2194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 2204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param tag Element tag. 2214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param attrib Element attributes, given as a dictionary. 2224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @return A new element instance. 2234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def makeelement(self, tag, attrib): 2254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.__class__(tag, attrib) 2264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 2284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # (Experimental) Copies the current element. This creates a 2294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # shallow copy; subelements will be shared with the original tree. 2304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 2314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @return A new element instance. 2324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def copy(self): 2344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elem = self.makeelement(self.tag, self.attrib) 2354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elem.text = self.text 2364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elem.tail = self.tail 2374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elem[:] = self 2384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return elem 2394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 2414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Returns the number of subelements. Note that this only counts 2424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # full elements; to check if there's any content in an element, you 2434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # have to check both the length and the <b>text</b> attribute. 2444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 2454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @return The number of subelements. 2464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __len__(self): 2484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return len(self._children) 2494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __nonzero__(self): 2514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm warnings.warn( 2524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "The behavior of this method will change in future versions. " 2534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "Use specific 'len(elem)' or 'elem is not None' test instead.", 2544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm FutureWarning, stacklevel=2 2554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ) 2564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return len(self._children) != 0 # emulate old behaviour, for now 2574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 2594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Returns the given subelement, by index. 2604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 2614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param index What subelement to return. 2624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @return The given subelement. 2634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @exception IndexError If the given element does not exist. 2644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __getitem__(self, index): 2664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self._children[index] 2674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 2694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Replaces the given subelement, by index. 2704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 2714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param index What subelement to replace. 2724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param element The new element value. 2734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @exception IndexError If the given element does not exist. 2744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __setitem__(self, index, element): 2764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # if isinstance(index, slice): 2774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # for elt in element: 2784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # assert iselement(elt) 2794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # else: 2804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # assert iselement(element) 2814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._children[index] = element 2824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 2844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Deletes the given subelement, by index. 2854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 2864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param index What subelement to delete. 2874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @exception IndexError If the given element does not exist. 2884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __delitem__(self, index): 2904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm del self._children[index] 2914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 2934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Adds a subelement to the end of this element. In document order, 2944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # the new element will appear after the last existing subelement (or 2954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # directly after the text, if it's the first subelement), but before 2964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # the end tag for this element. 2974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 2984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param element The element to add. 2994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def append(self, element): 3014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # assert iselement(element) 3024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._children.append(element) 3034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 3054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Appends subelements from a sequence. 3064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 3074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param elements A sequence object with zero or more elements. 3084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @since 1.3 3094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def extend(self, elements): 3114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # for element in elements: 3124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # assert iselement(element) 3134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._children.extend(elements) 3144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 3164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Inserts a subelement at the given position in this element. 3174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 3184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param index Where to insert the new subelement. 3194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def insert(self, index, element): 3214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # assert iselement(element) 3224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._children.insert(index, element) 3234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 3254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Removes a matching subelement. Unlike the <b>find</b> methods, 3264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # this method compares elements based on identity, not on tag 3274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # value or contents. To remove subelements by other means, the 3284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # easiest way is often to use a list comprehension to select what 3294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # elements to keep, and use slice assignment to update the parent 3304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # element. 3314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 3324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param element What element to remove. 3334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @exception ValueError If a matching element could not be found. 3344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def remove(self, element): 3364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # assert iselement(element) 3374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._children.remove(element) 3384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 3404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # (Deprecated) Returns all subelements. The elements are returned 3414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # in document order. 3424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 3434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @return A list of subelements. 3444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @defreturn list of Element instances 3454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def getchildren(self): 3474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm warnings.warn( 3484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "This method will be removed in future versions. " 3494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "Use 'list(elem)' or iteration over elem instead.", 3504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm DeprecationWarning, stacklevel=2 3514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ) 3524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self._children 3534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 3554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Finds the first matching subelement, by tag name or path. 3564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 3574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param path What element to look for. 3584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @keyparam namespaces Optional namespace prefix map. 3594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @return The first matching element, or None if no element was found. 3604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @defreturn Element or None 3614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def find(self, path, namespaces=None): 3634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return ElementPath.find(self, path, namespaces) 3644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 3664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Finds text for the first matching subelement, by tag name or path. 3674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 3684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param path What element to look for. 3694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param default What to return if the element was not found. 3704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @keyparam namespaces Optional namespace prefix map. 3714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @return The text content of the first matching element, or the 3724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # default value no element was found. Note that if the element 3734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # is found, but has no text content, this method returns an 3744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # empty string. 3754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @defreturn string 3764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def findtext(self, path, default=None, namespaces=None): 3784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return ElementPath.findtext(self, path, default, namespaces) 3794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 3814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Finds all matching subelements, by tag name or path. 3824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 3834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param path What element to look for. 3844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @keyparam namespaces Optional namespace prefix map. 3854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @return A list or other sequence containing all matching elements, 3864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # in document order. 3874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @defreturn list of Element instances 3884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def findall(self, path, namespaces=None): 3904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return ElementPath.findall(self, path, namespaces) 3914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 3934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Finds all matching subelements, by tag name or path. 3944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 3954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param path What element to look for. 3964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @keyparam namespaces Optional namespace prefix map. 3974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @return An iterator or sequence containing all matching elements, 3984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # in document order. 3994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @defreturn a generated sequence of Element instances 4004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def iterfind(self, path, namespaces=None): 4024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return ElementPath.iterfind(self, path, namespaces) 4034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 4054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Resets an element. This function removes all subelements, clears 4064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # all attributes, and sets the <b>text</b> and <b>tail</b> attributes 4074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # to None. 4084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def clear(self): 4104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.attrib.clear() 4114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._children = [] 4124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.text = self.tail = None 4134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 4154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Gets an element attribute. Equivalent to <b>attrib.get</b>, but 4164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # some implementations may handle this a bit more efficiently. 4174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 4184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param key What attribute to look for. 4194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param default What to return if the attribute was not found. 4204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @return The attribute value, or the default value, if the 4214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # attribute was not found. 4224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @defreturn string or None 4234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def get(self, key, default=None): 4254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.attrib.get(key, default) 4264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 4284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Sets an element attribute. Equivalent to <b>attrib[key] = value</b>, 4294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # but some implementations may handle this a bit more efficiently. 4304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 4314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param key What attribute to set. 4324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param value The attribute value. 4334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def set(self, key, value): 4354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.attrib[key] = value 4364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 4384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Gets a list of attribute names. The names are returned in an 4394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # arbitrary order (just like for an ordinary Python dictionary). 4404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Equivalent to <b>attrib.keys()</b>. 4414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 4424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @return A list of element attribute names. 4434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @defreturn list of strings 4444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def keys(self): 4464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.attrib.keys() 4474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 4494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Gets element attributes, as a sequence. The attributes are 4504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # returned in an arbitrary order. Equivalent to <b>attrib.items()</b>. 4514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 4524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @return A list of (name, value) tuples for all attributes. 4534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @defreturn list of (string, string) tuples 4544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def items(self): 4564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.attrib.items() 4574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 4594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Creates a tree iterator. The iterator loops over this element 4604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # and all subelements, in document order, and returns all elements 4614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # with a matching tag. 4624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # <p> 4634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # If the tree structure is modified during iteration, new or removed 4644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # elements may or may not be included. To get a stable set, use the 4654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # list() function on the iterator, and loop over the resulting list. 4664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 4674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param tag What tags to look for (default is to return all elements). 4684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @return An iterator containing all the matching elements. 4694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @defreturn iterator 4704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def iter(self, tag=None): 4724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if tag == "*": 4734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm tag = None 4744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if tag is None or self.tag == tag: 4754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yield self 4764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for e in self._children: 4774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for e in e.iter(tag): 4784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yield e 4794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # compatibility 4814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def getiterator(self, tag=None): 4824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Change for a DeprecationWarning in 1.4 4834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm warnings.warn( 4844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "This method will be removed in future versions. " 4854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "Use 'elem.iter()' or 'list(elem.iter())' instead.", 4864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm PendingDeprecationWarning, stacklevel=2 4874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ) 4884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return list(self.iter(tag)) 4894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 4914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Creates a text iterator. The iterator loops over this element 4924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # and all subelements, in document order, and returns all inner 4934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # text. 4944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 4954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @return An iterator containing all inner text. 4964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @defreturn iterator 4974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def itertext(self): 4994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm tag = self.tag 5004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not isinstance(tag, basestring) and tag is not None: 5014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 5024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.text: 5034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yield self.text 5044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for e in self: 5054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for s in e.itertext(): 5064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yield s 5074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if e.tail: 5084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yield e.tail 5094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# compatibility 5114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm_Element = _ElementInterface = Element 5124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm## 5144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Subelement factory. This function creates an element instance, and 5154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# appends it to an existing element. 5164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# <p> 5174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# The element name, attribute names, and attribute values can be 5184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# either 8-bit ASCII strings or Unicode strings. 5194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 5204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param parent The parent element. 5214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param tag The subelement name. 5224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param attrib An optional dictionary, containing element attributes. 5234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param **extra Additional attributes, given as keyword arguments. 5244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @return An element instance. 5254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @defreturn Element 5264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef SubElement(parent, tag, attrib={}, **extra): 5284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm attrib = attrib.copy() 5294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm attrib.update(extra) 5304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm element = parent.makeelement(tag, attrib) 5314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parent.append(element) 5324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return element 5334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm## 5354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Comment element factory. This factory function creates a special 5364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# element that will be serialized as an XML comment by the standard 5374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# serializer. 5384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# <p> 5394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# The comment string can be either an 8-bit ASCII string or a Unicode 5404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# string. 5414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 5424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param text A string containing the comment string. 5434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @return An element instance, representing a comment. 5444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @defreturn Element 5454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef Comment(text=None): 5474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm element = Element(Comment) 5484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm element.text = text 5494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return element 5504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm## 5524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# PI element factory. This factory function creates a special element 5534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# that will be serialized as an XML processing instruction by the standard 5544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# serializer. 5554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 5564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param target A string containing the PI target. 5574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param text A string containing the PI contents, if any. 5584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @return An element instance, representing a PI. 5594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @defreturn Element 5604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef ProcessingInstruction(target, text=None): 5624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm element = Element(ProcessingInstruction) 5634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm element.text = target 5644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if text: 5654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm element.text = element.text + " " + text 5664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return element 5674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmPI = ProcessingInstruction 5694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm## 5714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# QName wrapper. This can be used to wrap a QName attribute value, in 5724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# order to get proper namespace handling on output. 5734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 5744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param text A string containing the QName value, in the form {uri}local, 5754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# or, if the tag argument is given, the URI part of a QName. 5764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param tag Optional tag. If given, the first argument is interpreted as 5774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# an URI, and this argument is interpreted as a local name. 5784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @return An opaque object, representing the QName. 5794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass QName(object): 5814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self, text_or_uri, tag=None): 5824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if tag: 5834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm text_or_uri = "{%s}%s" % (text_or_uri, tag) 5844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.text = text_or_uri 5854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __str__(self): 5864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.text 5874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __hash__(self): 5884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return hash(self.text) 5894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __cmp__(self, other): 5904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if isinstance(other, QName): 5914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return cmp(self.text, other.text) 5924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return cmp(self.text, other) 5934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# -------------------------------------------------------------------- 5954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm## 5974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# ElementTree wrapper class. This class represents an entire element 5984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# hierarchy, and adds some extra support for serialization to and from 5994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# standard XML. 6004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 6014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param element Optional root element. 6024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @keyparam file Optional file handle or file name. If given, the 6034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# tree is initialized with the contents of this XML file. 6044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass ElementTree(object): 6064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self, element=None, file=None): 6084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # assert element is None or iselement(element) 6094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._root = element # first node 6104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if file: 6114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.parse(file) 6124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 6144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Gets the root element for this tree. 6154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 6164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @return An element instance. 6174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @defreturn Element 6184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def getroot(self): 6204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self._root 6214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 6234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Replaces the root element for this tree. This discards the 6244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # current contents of the tree, and replaces it with the given 6254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # element. Use with care. 6264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 6274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param element An element instance. 6284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _setroot(self, element): 6304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # assert iselement(element) 6314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._root = element 6324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 6344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Loads an external XML document into this element tree. 6354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 6364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param source A file name or file object. If a file object is 6374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # given, it only has to implement a <b>read(n)</b> method. 6384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @keyparam parser An optional parser instance. If not given, the 6394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # standard {@link XMLParser} parser is used. 6404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @return The document root element. 6414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @defreturn Element 6424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @exception ParseError If the parser fails to parse the document. 6434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def parse(self, source, parser=None): 6454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not hasattr(source, "read"): 6464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm source = open(source, "rb") 6474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not parser: 6484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parser = XMLParser(target=TreeBuilder()) 6494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm while 1: 6504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm data = source.read(65536) 6514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not data: 6524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm break 6534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parser.feed(data) 6544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._root = parser.close() 6554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self._root 6564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 6584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Creates a tree iterator for the root element. The iterator loops 6594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # over all elements in this tree, in document order. 6604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 6614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param tag What tags to look for (default is to return all elements) 6624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @return An iterator. 6634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @defreturn iterator 6644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def iter(self, tag=None): 6664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # assert self._root is not None 6674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self._root.iter(tag) 6684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # compatibility 6704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def getiterator(self, tag=None): 6714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Change for a DeprecationWarning in 1.4 6724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm warnings.warn( 6734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "This method will be removed in future versions. " 6744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "Use 'tree.iter()' or 'list(tree.iter())' instead.", 6754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm PendingDeprecationWarning, stacklevel=2 6764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ) 6774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return list(self.iter(tag)) 6784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 6804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Finds the first toplevel element with given tag. 6814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Same as getroot().find(path). 6824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 6834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param path What element to look for. 6844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @keyparam namespaces Optional namespace prefix map. 6854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @return The first matching element, or None if no element was found. 6864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @defreturn Element or None 6874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def find(self, path, namespaces=None): 6894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # assert self._root is not None 6904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if path[:1] == "/": 6914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm path = "." + path 6924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm warnings.warn( 6934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "This search is broken in 1.3 and earlier, and will be " 6944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "fixed in a future version. If you rely on the current " 6954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "behaviour, change it to %r" % path, 6964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm FutureWarning, stacklevel=2 6974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ) 6984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self._root.find(path, namespaces) 6994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 7014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Finds the element text for the first toplevel element with given 7024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # tag. Same as getroot().findtext(path). 7034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 7044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param path What toplevel element to look for. 7054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param default What to return if the element was not found. 7064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @keyparam namespaces Optional namespace prefix map. 7074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @return The text content of the first matching element, or the 7084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # default value no element was found. Note that if the element 7094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # is found, but has no text content, this method returns an 7104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # empty string. 7114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @defreturn string 7124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def findtext(self, path, default=None, namespaces=None): 7144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # assert self._root is not None 7154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if path[:1] == "/": 7164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm path = "." + path 7174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm warnings.warn( 7184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "This search is broken in 1.3 and earlier, and will be " 7194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "fixed in a future version. If you rely on the current " 7204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "behaviour, change it to %r" % path, 7214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm FutureWarning, stacklevel=2 7224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ) 7234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self._root.findtext(path, default, namespaces) 7244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 7264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Finds all toplevel elements with the given tag. 7274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Same as getroot().findall(path). 7284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 7294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param path What element to look for. 7304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @keyparam namespaces Optional namespace prefix map. 7314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @return A list or iterator containing all matching elements, 7324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # in document order. 7334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @defreturn list of Element instances 7344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def findall(self, path, namespaces=None): 7364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # assert self._root is not None 7374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if path[:1] == "/": 7384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm path = "." + path 7394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm warnings.warn( 7404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "This search is broken in 1.3 and earlier, and will be " 7414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "fixed in a future version. If you rely on the current " 7424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "behaviour, change it to %r" % path, 7434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm FutureWarning, stacklevel=2 7444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ) 7454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self._root.findall(path, namespaces) 7464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 7484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Finds all matching subelements, by tag name or path. 7494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Same as getroot().iterfind(path). 7504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 7514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param path What element to look for. 7524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @keyparam namespaces Optional namespace prefix map. 7534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @return An iterator or sequence containing all matching elements, 7544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # in document order. 7554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @defreturn a generated sequence of Element instances 7564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def iterfind(self, path, namespaces=None): 7584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # assert self._root is not None 7594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if path[:1] == "/": 7604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm path = "." + path 7614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm warnings.warn( 7624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "This search is broken in 1.3 and earlier, and will be " 7634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "fixed in a future version. If you rely on the current " 7644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "behaviour, change it to %r" % path, 7654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm FutureWarning, stacklevel=2 7664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ) 7674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self._root.iterfind(path, namespaces) 7684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 7704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Writes the element tree to a file, as XML. 7714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 7724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @def write(file, **options) 7734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param file A file name, or a file object opened for writing. 7744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param **options Options, given as keyword arguments. 7754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @keyparam encoding Optional output encoding (default is US-ASCII). 7764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @keyparam method Optional output method ("xml", "html", "text" or 7774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # "c14n"; default is "xml"). 7784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @keyparam xml_declaration Controls if an XML declaration should 7794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # be added to the file. Use False for never, True for always, 7804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # None for only if not US-ASCII or UTF-8. None is default. 7814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def write(self, file_or_filename, 7834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # keyword arguments 7844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm encoding=None, 7854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm xml_declaration=None, 7864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm default_namespace=None, 7874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm method=None): 7884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # assert self._root is not None 7894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not method: 7904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm method = "xml" 7914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif method not in _serialize: 7924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # FIXME: raise an ImportError for c14n if ElementC14N is missing? 7934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise ValueError("unknown method %r" % method) 7944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if hasattr(file_or_filename, "write"): 7954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm file = file_or_filename 7964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 7974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm file = open(file_or_filename, "wb") 7984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm write = file.write 7994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not encoding: 8004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if method == "c14n": 8014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm encoding = "utf-8" 8024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 8034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm encoding = "us-ascii" 8044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif xml_declaration or (xml_declaration is None and 8054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm encoding not in ("utf-8", "us-ascii")): 8064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if method == "xml": 8074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm write("<?xml version='1.0' encoding='%s'?>\n" % encoding) 8084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if method == "text": 8094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _serialize_text(write, self._root, encoding) 8104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 8114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm qnames, namespaces = _namespaces( 8124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._root, encoding, default_namespace 8134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ) 8144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm serialize = _serialize[method] 8154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm serialize(write, self._root, encoding, qnames, namespaces) 8164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if file_or_filename is not file: 8174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm file.close() 8184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def write_c14n(self, file): 8204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # lxml.etree compatibility. use output method instead 8214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.write(file, method="c14n") 8224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# -------------------------------------------------------------------- 8244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# serialization support 8254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _namespaces(elem, encoding, default_namespace=None): 8274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # identify namespaces used in this tree 8284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # maps qnames to *encoded* prefix:local names 8304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm qnames = {None: None} 8314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # maps uri:s to prefixes 8334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm namespaces = {} 8344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if default_namespace: 8354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm namespaces[default_namespace] = "" 8364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def encode(text): 8384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return text.encode(encoding) 8394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def add_qname(qname): 8414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # calculate serialized qname representation 8424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 8434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if qname[:1] == "{": 8444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm uri, tag = qname[1:].rsplit("}", 1) 8454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm prefix = namespaces.get(uri) 8464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if prefix is None: 8474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm prefix = _namespace_map.get(uri) 8484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if prefix is None: 8494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm prefix = "ns%d" % len(namespaces) 8504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if prefix != "xml": 8514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm namespaces[uri] = prefix 8524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if prefix: 8534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm qnames[qname] = encode("%s:%s" % (prefix, tag)) 8544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 8554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm qnames[qname] = encode(tag) # default element 8564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 8574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if default_namespace: 8584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # FIXME: can this be handled in XML 1.0? 8594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise ValueError( 8604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "cannot use non-qualified names with " 8614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "default_namespace option" 8624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ) 8634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm qnames[qname] = encode(qname) 8644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except TypeError: 8654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _raise_serialization_error(qname) 8664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # populate qname and namespaces table 8684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 8694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm iterate = elem.iter 8704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except AttributeError: 8714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm iterate = elem.getiterator # cET compatibility 8724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for elem in iterate(): 8734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm tag = elem.tag 8744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if isinstance(tag, QName): 8754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if tag.text not in qnames: 8764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm add_qname(tag.text) 8774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif isinstance(tag, basestring): 8784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if tag not in qnames: 8794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm add_qname(tag) 8804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif tag is not None and tag is not Comment and tag is not PI: 8814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _raise_serialization_error(tag) 8824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for key, value in elem.items(): 8834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if isinstance(key, QName): 8844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm key = key.text 8854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if key not in qnames: 8864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm add_qname(key) 8874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if isinstance(value, QName) and value.text not in qnames: 8884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm add_qname(value.text) 8894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm text = elem.text 8904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if isinstance(text, QName) and text.text not in qnames: 8914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm add_qname(text.text) 8924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return qnames, namespaces 8934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _serialize_xml(write, elem, encoding, qnames, namespaces): 8954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm tag = elem.tag 8964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm text = elem.text 8974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if tag is Comment: 8984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm write("<!--%s-->" % _encode(text, encoding)) 8994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif tag is ProcessingInstruction: 9004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm write("<?%s?>" % _encode(text, encoding)) 9014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 9024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm tag = qnames[tag] 9034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if tag is None: 9044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if text: 9054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm write(_escape_cdata(text, encoding)) 9064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for e in elem: 9074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _serialize_xml(write, e, encoding, qnames, None) 9084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 9094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm write("<" + tag) 9104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm items = elem.items() 9114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if items or namespaces: 9124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if namespaces: 9134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for v, k in sorted(namespaces.items(), 9144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm key=lambda x: x[1]): # sort on prefix 9154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if k: 9164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm k = ":" + k 9174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm write(" xmlns%s=\"%s\"" % ( 9184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm k.encode(encoding), 9194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _escape_attrib(v, encoding) 9204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm )) 9214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for k, v in sorted(items): # lexical order 9224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if isinstance(k, QName): 9234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm k = k.text 9244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if isinstance(v, QName): 9254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm v = qnames[v.text] 9264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 9274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm v = _escape_attrib(v, encoding) 9284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm write(" %s=\"%s\"" % (qnames[k], v)) 9294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if text or len(elem): 9304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm write(">") 9314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if text: 9324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm write(_escape_cdata(text, encoding)) 9334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for e in elem: 9344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _serialize_xml(write, e, encoding, qnames, None) 9354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm write("</" + tag + ">") 9364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 9374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm write(" />") 9384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if elem.tail: 9394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm write(_escape_cdata(elem.tail, encoding)) 9404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmHTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", 9424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "img", "input", "isindex", "link", "meta" "param") 9434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmtry: 9454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm HTML_EMPTY = set(HTML_EMPTY) 9464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmexcept NameError: 9474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm pass 9484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _serialize_html(write, elem, encoding, qnames, namespaces): 9504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm tag = elem.tag 9514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm text = elem.text 9524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if tag is Comment: 9534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm write("<!--%s-->" % _escape_cdata(text, encoding)) 9544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif tag is ProcessingInstruction: 9554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm write("<?%s?>" % _escape_cdata(text, encoding)) 9564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 9574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm tag = qnames[tag] 9584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if tag is None: 9594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if text: 9604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm write(_escape_cdata(text, encoding)) 9614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for e in elem: 9624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _serialize_html(write, e, encoding, qnames, None) 9634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 9644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm write("<" + tag) 9654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm items = elem.items() 9664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if items or namespaces: 9674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if namespaces: 9684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for v, k in sorted(namespaces.items(), 9694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm key=lambda x: x[1]): # sort on prefix 9704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if k: 9714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm k = ":" + k 9724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm write(" xmlns%s=\"%s\"" % ( 9734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm k.encode(encoding), 9744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _escape_attrib(v, encoding) 9754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm )) 9764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for k, v in sorted(items): # lexical order 9774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if isinstance(k, QName): 9784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm k = k.text 9794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if isinstance(v, QName): 9804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm v = qnames[v.text] 9814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 9824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm v = _escape_attrib_html(v, encoding) 9834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # FIXME: handle boolean attributes 9844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm write(" %s=\"%s\"" % (qnames[k], v)) 9854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm write(">") 9864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm tag = tag.lower() 9874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if text: 9884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if tag == "script" or tag == "style": 9894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm write(_encode(text, encoding)) 9904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 9914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm write(_escape_cdata(text, encoding)) 9924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for e in elem: 9934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _serialize_html(write, e, encoding, qnames, None) 9944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if tag not in HTML_EMPTY: 9954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm write("</" + tag + ">") 9964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if elem.tail: 9974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm write(_escape_cdata(elem.tail, encoding)) 9984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _serialize_text(write, elem, encoding): 10004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for part in elem.itertext(): 10014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm write(part.encode(encoding)) 10024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if elem.tail: 10034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm write(elem.tail.encode(encoding)) 10044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 10054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm_serialize = { 10064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "xml": _serialize_xml, 10074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "html": _serialize_html, 10084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "text": _serialize_text, 10094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# this optional method is imported at the end of the module 10104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# "c14n": _serialize_c14n, 10114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 10124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 10134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm## 10144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Registers a namespace prefix. The registry is global, and any 10154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# existing mapping for either the given prefix or the namespace URI 10164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# will be removed. 10174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 10184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param prefix Namespace prefix. 10194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param uri Namespace uri. Tags and attributes in this namespace 10204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# will be serialized with the given prefix, if at all possible. 10214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @exception ValueError If the prefix is reserved, or is otherwise 10224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# invalid. 10234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 10244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef register_namespace(prefix, uri): 10254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if re.match("ns\d+$", prefix): 10264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise ValueError("Prefix format reserved for internal use") 10274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for k, v in _namespace_map.items(): 10284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if k == uri or v == prefix: 10294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm del _namespace_map[k] 10304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _namespace_map[uri] = prefix 10314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 10324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm_namespace_map = { 10334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # "well-known" namespace prefixes 10344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "http://www.w3.org/XML/1998/namespace": "xml", 10354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "http://www.w3.org/1999/xhtml": "html", 10364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", 10374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "http://schemas.xmlsoap.org/wsdl/": "wsdl", 10384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # xml schema 10394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "http://www.w3.org/2001/XMLSchema": "xs", 10404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "http://www.w3.org/2001/XMLSchema-instance": "xsi", 10414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # dublin core 10424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "http://purl.org/dc/elements/1.1/": "dc", 10434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 10444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 10454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _raise_serialization_error(text): 10464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise TypeError( 10474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "cannot serialize %r (type %s)" % (text, type(text).__name__) 10484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ) 10494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 10504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _encode(text, encoding): 10514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 10524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return text.encode(encoding, "xmlcharrefreplace") 10534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except (TypeError, AttributeError): 10544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _raise_serialization_error(text) 10554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 10564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _escape_cdata(text, encoding): 10574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # escape character data 10584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 10594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # it's worth avoiding do-nothing calls for strings that are 10604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # shorter than 500 character, or so. assume that's, by far, 10614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # the most common case in most applications. 10624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if "&" in text: 10634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm text = text.replace("&", "&") 10644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if "<" in text: 10654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm text = text.replace("<", "<") 10664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if ">" in text: 10674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm text = text.replace(">", ">") 10684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return text.encode(encoding, "xmlcharrefreplace") 10694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except (TypeError, AttributeError): 10704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _raise_serialization_error(text) 10714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 10724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _escape_attrib(text, encoding): 10734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # escape attribute value 10744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 10754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if "&" in text: 10764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm text = text.replace("&", "&") 10774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if "<" in text: 10784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm text = text.replace("<", "<") 10794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if ">" in text: 10804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm text = text.replace(">", ">") 10814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if "\"" in text: 10824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm text = text.replace("\"", """) 10834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if "\n" in text: 10844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm text = text.replace("\n", " ") 10854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return text.encode(encoding, "xmlcharrefreplace") 10864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except (TypeError, AttributeError): 10874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _raise_serialization_error(text) 10884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 10894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _escape_attrib_html(text, encoding): 10904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # escape attribute value 10914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 10924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if "&" in text: 10934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm text = text.replace("&", "&") 10944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if ">" in text: 10954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm text = text.replace(">", ">") 10964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if "\"" in text: 10974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm text = text.replace("\"", """) 10984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return text.encode(encoding, "xmlcharrefreplace") 10994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except (TypeError, AttributeError): 11004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _raise_serialization_error(text) 11014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# -------------------------------------------------------------------- 11034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm## 11054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Generates a string representation of an XML element, including all 11064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# subelements. 11074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 11084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param element An Element instance. 11094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @keyparam encoding Optional output encoding (default is US-ASCII). 11104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @keyparam method Optional output method ("xml", "html", "text" or 11114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# "c14n"; default is "xml"). 11124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @return An encoded string containing the XML data. 11134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @defreturn string 11144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef tostring(element, encoding=None, method=None): 11164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm class dummy: 11174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm pass 11184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm data = [] 11194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm file = dummy() 11204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm file.write = data.append 11214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ElementTree(element).write(file, encoding, method=method) 11224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return "".join(data) 11234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm## 11254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Generates a string representation of an XML element, including all 11264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# subelements. The string is returned as a sequence of string fragments. 11274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 11284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param element An Element instance. 11294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @keyparam encoding Optional output encoding (default is US-ASCII). 11304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @keyparam method Optional output method ("xml", "html", "text" or 11314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# "c14n"; default is "xml"). 11324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @return A sequence object containing the XML data. 11334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @defreturn sequence 11344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @since 1.3 11354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef tostringlist(element, encoding=None, method=None): 11374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm class dummy: 11384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm pass 11394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm data = [] 11404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm file = dummy() 11414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm file.write = data.append 11424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ElementTree(element).write(file, encoding, method=method) 11434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # FIXME: merge small fragments into larger parts 11444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return data 11454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm## 11474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Writes an element tree or element structure to sys.stdout. This 11484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# function should be used for debugging only. 11494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# <p> 11504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# The exact output format is implementation dependent. In this 11514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# version, it's written as an ordinary XML file. 11524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 11534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param elem An element tree or an individual element. 11544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef dump(elem): 11564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # debugging 11574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not isinstance(elem, ElementTree): 11584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elem = ElementTree(elem) 11594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elem.write(sys.stdout) 11604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm tail = elem.getroot().tail 11614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not tail or tail[-1] != "\n": 11624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm sys.stdout.write("\n") 11634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# -------------------------------------------------------------------- 11654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# parsing 11664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm## 11684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Parses an XML document into an element tree. 11694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 11704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param source A filename or file object containing XML data. 11714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param parser An optional parser instance. If not given, the 11724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# standard {@link XMLParser} parser is used. 11734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @return An ElementTree instance 11744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef parse(source, parser=None): 11764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm tree = ElementTree() 11774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm tree.parse(source, parser) 11784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return tree 11794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm## 11814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Parses an XML document into an element tree incrementally, and reports 11824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# what's going on to the user. 11834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 11844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param source A filename or file object containing XML data. 11854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param events A list of events to report back. If omitted, only "end" 11864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# events are reported. 11874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param parser An optional parser instance. If not given, the 11884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# standard {@link XMLParser} parser is used. 11894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @return A (event, elem) iterator. 11904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef iterparse(source, events=None, parser=None): 11924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not hasattr(source, "read"): 11934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm source = open(source, "rb") 11944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not parser: 11954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parser = XMLParser(target=TreeBuilder()) 11964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return _IterParseIterator(source, events, parser) 11974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass _IterParseIterator(object): 11994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self, source, events, parser): 12014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._file = source 12024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._events = [] 12034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._index = 0 12044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.root = self._root = None 12054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._parser = parser 12064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # wire up the parser for event reporting 12074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parser = self._parser._parser 12084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm append = self._events.append 12094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if events is None: 12104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm events = ["end"] 12114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for event in events: 12124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if event == "start": 12134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 12144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parser.ordered_attributes = 1 12154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parser.specified_attributes = 1 12164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def handler(tag, attrib_in, event=event, append=append, 12174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm start=self._parser._start_list): 12184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm append((event, start(tag, attrib_in))) 12194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parser.StartElementHandler = handler 12204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except AttributeError: 12214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def handler(tag, attrib_in, event=event, append=append, 12224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm start=self._parser._start): 12234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm append((event, start(tag, attrib_in))) 12244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parser.StartElementHandler = handler 12254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif event == "end": 12264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def handler(tag, event=event, append=append, 12274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm end=self._parser._end): 12284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm append((event, end(tag))) 12294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parser.EndElementHandler = handler 12304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif event == "start-ns": 12314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def handler(prefix, uri, event=event, append=append): 12324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 12334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm uri = (uri or "").encode("ascii") 12344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except UnicodeError: 12354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm pass 12364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm append((event, (prefix or "", uri or ""))) 12374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parser.StartNamespaceDeclHandler = handler 12384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif event == "end-ns": 12394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def handler(prefix, event=event, append=append): 12404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm append((event, None)) 12414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parser.EndNamespaceDeclHandler = handler 12424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 12434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise ValueError("unknown event %r" % event) 12444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def next(self): 12464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm while 1: 12474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 12484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm item = self._events[self._index] 12494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except IndexError: 12504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self._parser is None: 12514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.root = self._root 12524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise StopIteration 12534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # load event buffer 12544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm del self._events[:] 12554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._index = 0 12564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm data = self._file.read(16384) 12574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if data: 12584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._parser.feed(data) 12594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 12604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._root = self._parser.close() 12614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._parser = None 12624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 12634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._index = self._index + 1 12644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return item 12654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __iter__(self): 12674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self 12684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm## 12704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Parses an XML document from a string constant. This function can 12714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# be used to embed "XML literals" in Python code. 12724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 12734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param source A string containing XML data. 12744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param parser An optional parser instance. If not given, the 12754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# standard {@link XMLParser} parser is used. 12764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @return An Element instance. 12774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @defreturn Element 12784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef XML(text, parser=None): 12804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not parser: 12814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parser = XMLParser(target=TreeBuilder()) 12824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parser.feed(text) 12834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return parser.close() 12844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm## 12864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Parses an XML document from a string constant, and also returns 12874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# a dictionary which maps from element id:s to elements. 12884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 12894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param source A string containing XML data. 12904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param parser An optional parser instance. If not given, the 12914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# standard {@link XMLParser} parser is used. 12924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @return A tuple containing an Element instance and a dictionary. 12934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @defreturn (Element, dictionary) 12944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef XMLID(text, parser=None): 12964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not parser: 12974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parser = XMLParser(target=TreeBuilder()) 12984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parser.feed(text) 12994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm tree = parser.close() 13004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ids = {} 13014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for elem in tree.iter(): 13024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm id = elem.get("id") 13034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if id: 13044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ids[id] = elem 13054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return tree, ids 13064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm## 13084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Parses an XML document from a string constant. Same as {@link #XML}. 13094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 13104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @def fromstring(text) 13114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param source A string containing XML data. 13124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @return An Element instance. 13134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @defreturn Element 13144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmfromstring = XML 13164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm## 13184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Parses an XML document from a sequence of string fragments. 13194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 13204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param sequence A list or other sequence containing XML data fragments. 13214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param parser An optional parser instance. If not given, the 13224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# standard {@link XMLParser} parser is used. 13234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @return An Element instance. 13244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @defreturn Element 13254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @since 1.3 13264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef fromstringlist(sequence, parser=None): 13284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not parser: 13294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parser = XMLParser(target=TreeBuilder()) 13304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for text in sequence: 13314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parser.feed(text) 13324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return parser.close() 13334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# -------------------------------------------------------------------- 13354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm## 13374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Generic element structure builder. This builder converts a sequence 13384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link 13394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# #TreeBuilder.end} method calls to a well-formed element structure. 13404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# <p> 13414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# You can use this class to build an element structure using a custom XML 13424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# parser, or a parser for some other XML-like format. 13434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 13444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @param element_factory Optional element factory. This factory 13454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# is called to create new Element instances, as necessary. 13464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass TreeBuilder(object): 13484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self, element_factory=None): 13504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._data = [] # data collector 13514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._elem = [] # element stack 13524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._last = None # last element 13534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._tail = None # true if we're after an end tag 13544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if element_factory is None: 13554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm element_factory = Element 13564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._factory = element_factory 13574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 13594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Flushes the builder buffers, and returns the toplevel document 13604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # element. 13614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 13624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @return An Element instance. 13634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @defreturn Element 13644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def close(self): 13664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm assert len(self._elem) == 0, "missing end tags" 13674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm assert self._last is not None, "missing toplevel element" 13684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self._last 13694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _flush(self): 13714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self._data: 13724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self._last is not None: 13734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm text = "".join(self._data) 13744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self._tail: 13754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm assert self._last.tail is None, "internal error (tail)" 13764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._last.tail = text 13774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 13784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm assert self._last.text is None, "internal error (text)" 13794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._last.text = text 13804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._data = [] 13814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 13834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Adds text to the current element. 13844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 13854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param data A string. This should be either an 8-bit string 13864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # containing ASCII text, or a Unicode string. 13874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def data(self, data): 13894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._data.append(data) 13904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 13924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Opens a new element. 13934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 13944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param tag The element name. 13954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param attrib A dictionary containing element attributes. 13964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @return The opened element. 13974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @defreturn Element 13984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def start(self, tag, attrs): 14004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._flush() 14014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._last = elem = self._factory(tag, attrs) 14024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self._elem: 14034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._elem[-1].append(elem) 14044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._elem.append(elem) 14054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._tail = 0 14064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return elem 14074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 14084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 14094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Closes the current element. 14104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 14114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param tag The element name. 14124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @return The closed element. 14134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @defreturn Element 14144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 14154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def end(self, tag): 14164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._flush() 14174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._last = self._elem.pop() 14184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm assert self._last.tag == tag,\ 14194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "end tag mismatch (expected %s, got %s)" % ( 14204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._last.tag, tag) 14214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._tail = 1 14224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self._last 14234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 14244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm## 14254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Element structure builder for XML source data, based on the 14264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# <b>expat</b> parser. 14274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 14284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @keyparam target Target object. If omitted, the builder uses an 14294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# instance of the standard {@link #TreeBuilder} class. 14304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @keyparam html Predefine HTML entities. This flag is not supported 14314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# by the current implementation. 14324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @keyparam encoding Optional encoding. If given, the value overrides 14334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# the encoding specified in the XML file. 14344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @see #ElementTree 14354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# @see #TreeBuilder 14364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 14374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass XMLParser(object): 14384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 14394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self, html=0, target=None, encoding=None): 14404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 14414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm from xml.parsers import expat 14424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except ImportError: 14434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 14444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm import pyexpat as expat 14454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except ImportError: 14464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise ImportError( 14474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "No module named expat; use SimpleXMLTreeBuilder instead" 14484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ) 14494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parser = expat.ParserCreate(encoding, "}") 14504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if target is None: 14514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm target = TreeBuilder() 14524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # underscored names are provided for compatibility only 14534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.parser = self._parser = parser 14544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.target = self._target = target 14554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._error = expat.error 14564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._names = {} # name memo cache 14574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # callbacks 14584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parser.DefaultHandlerExpand = self._default 14594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parser.StartElementHandler = self._start 14604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parser.EndElementHandler = self._end 14614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parser.CharacterDataHandler = self._data 14624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # optional callbacks 14634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parser.CommentHandler = self._comment 14644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parser.ProcessingInstructionHandler = self._pi 14654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # let expat do the buffering, if supported 14664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 14674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._parser.buffer_text = 1 14684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except AttributeError: 14694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm pass 14704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # use new-style attribute handling, if supported 14714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 14724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._parser.ordered_attributes = 1 14734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._parser.specified_attributes = 1 14744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parser.StartElementHandler = self._start_list 14754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except AttributeError: 14764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm pass 14774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._doctype = None 14784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.entity = {} 14794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 14804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.version = "Expat %d.%d.%d" % expat.version_info 14814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except AttributeError: 14824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm pass # unknown 14834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 14844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _raiseerror(self, value): 14854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm err = ParseError(value) 14864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm err.code = value.code 14874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm err.position = value.lineno, value.offset 14884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise err 14894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 14904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _fixtext(self, text): 14914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # convert text string to ascii, if possible 14924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 14934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return text.encode("ascii") 14944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except UnicodeError: 14954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return text 14964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 14974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _fixname(self, key): 14984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # expand qname, and convert name string to ascii, if possible 14994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 15004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm name = self._names[key] 15014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except KeyError: 15024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm name = key 15034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if "}" in name: 15044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm name = "{" + name 15054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._names[key] = name = self._fixtext(name) 15064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return name 15074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 15084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _start(self, tag, attrib_in): 15094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm fixname = self._fixname 15104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm fixtext = self._fixtext 15114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm tag = fixname(tag) 15124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm attrib = {} 15134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for key, value in attrib_in.items(): 15144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm attrib[fixname(key)] = fixtext(value) 15154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.target.start(tag, attrib) 15164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 15174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _start_list(self, tag, attrib_in): 15184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm fixname = self._fixname 15194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm fixtext = self._fixtext 15204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm tag = fixname(tag) 15214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm attrib = {} 15224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if attrib_in: 15234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for i in range(0, len(attrib_in), 2): 15244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm attrib[fixname(attrib_in[i])] = fixtext(attrib_in[i+1]) 15254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.target.start(tag, attrib) 15264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 15274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _data(self, text): 15284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.target.data(self._fixtext(text)) 15294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 15304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _end(self, tag): 15314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.target.end(self._fixname(tag)) 15324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 15334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _comment(self, data): 15344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 15354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm comment = self.target.comment 15364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except AttributeError: 15374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm pass 15384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 15394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return comment(self._fixtext(data)) 15404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 15414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _pi(self, target, data): 15424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 15434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm pi = self.target.pi 15444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except AttributeError: 15454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm pass 15464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 15474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return pi(self._fixtext(target), self._fixtext(data)) 15484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 15494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _default(self, text): 15504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm prefix = text[:1] 15514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if prefix == "&": 15524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # deal with undefined entities 15534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 15544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.target.data(self.entity[text[1:-1]]) 15554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except KeyError: 15564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm from xml.parsers import expat 15574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm err = expat.error( 15584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "undefined entity %s: line %d, column %d" % 15594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (text, self._parser.ErrorLineNumber, 15604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._parser.ErrorColumnNumber) 15614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ) 15624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm err.code = 11 # XML_ERROR_UNDEFINED_ENTITY 15634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm err.lineno = self._parser.ErrorLineNumber 15644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm err.offset = self._parser.ErrorColumnNumber 15654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise err 15664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif prefix == "<" and text[:9] == "<!DOCTYPE": 15674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._doctype = [] # inside a doctype declaration 15684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif self._doctype is not None: 15694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # parse doctype contents 15704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if prefix == ">": 15714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._doctype = None 15724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 15734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm text = text.strip() 15744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not text: 15754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 15764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._doctype.append(text) 15774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm n = len(self._doctype) 15784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if n > 2: 15794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm type = self._doctype[1] 15804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if type == "PUBLIC" and n == 4: 15814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm name, type, pubid, system = self._doctype 15824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif type == "SYSTEM" and n == 3: 15834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm name, type, system = self._doctype 15844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm pubid = None 15854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 15864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 15874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if pubid: 15884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm pubid = pubid[1:-1] 15894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if hasattr(self.target, "doctype"): 15904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.target.doctype(name, pubid, system[1:-1]) 15914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif self.doctype is not self._XMLParser__doctype: 15924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # warn about deprecated call 15934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._XMLParser__doctype(name, pubid, system[1:-1]) 15944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.doctype(name, pubid, system[1:-1]) 15954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._doctype = None 15964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 15974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 15984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # (Deprecated) Handles a doctype declaration. 15994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 16004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param name Doctype name. 16014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param pubid Public identifier. 16024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param system System identifier. 16034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 16044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def doctype(self, name, pubid, system): 16054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """This method of XMLParser is deprecated.""" 16064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm warnings.warn( 16074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "This method of XMLParser is deprecated. Define doctype() " 16084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "method on the TreeBuilder target.", 16094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm DeprecationWarning, 16104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ) 16114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 16124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # sentinel, if doctype is redefined in a subclass 16134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm __doctype = doctype 16144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 16154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 16164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Feeds data to the parser. 16174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 16184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @param data Encoded data. 16194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 16204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def feed(self, data): 16214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 16224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._parser.Parse(data, 0) 16234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except self._error, v: 16244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._raiseerror(v) 16254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 16264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ## 16274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Finishes feeding data to the parser. 16284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 16294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @return An element structure. 16304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # @defreturn Element 16314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 16324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def close(self): 16334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 16344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._parser.Parse("", 1) # end of data 16354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except self._error, v: 16364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._raiseerror(v) 16374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm tree = self.target.close() 16384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm del self.target, self._parser # get rid of circular references 16394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return tree 16404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 16414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# compatibility 16424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmXMLTreeBuilder = XMLParser 16434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 16444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# workaround circular import. 16454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmtry: 16464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm from ElementC14N import _serialize_c14n 16474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _serialize["c14n"] = _serialize_c14n 16484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmexcept ImportError: 16494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm pass 1650