1# xml.etree test.  This file contains enough tests to make sure that
2# all included components work as they should.
3# Large parts are extracted from the upstream test suite.
4
5# IMPORTANT: the same doctests are run from "test_xml_etree_c" in
6# order to ensure consistency between the C implementation and the
7# Python implementation.
8#
9# For this purpose, the module-level "ET" symbol is temporarily
10# monkey-patched when running the "test_xml_etree_c" test suite.
11# Don't re-import "xml.etree.ElementTree" module in the docstring,
12# except if the test is specific to the Python implementation.
13
14import sys
15import cgi
16
17from test import test_support
18from test.test_support import findfile
19
20from xml.etree import ElementTree as ET
21
22SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
23SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
24
25SAMPLE_XML = """\
26<body>
27  <tag class='a'>text</tag>
28  <tag class='b' />
29  <section>
30    <tag class='b' id='inner'>subtext</tag>
31  </section>
32</body>
33"""
34
35SAMPLE_SECTION = """\
36<section>
37  <tag class='b' id='inner'>subtext</tag>
38  <nexttag />
39  <nextsection>
40    <tag />
41  </nextsection>
42</section>
43"""
44
45SAMPLE_XML_NS = """
46<body xmlns="http://effbot.org/ns">
47  <tag>text</tag>
48  <tag />
49  <section>
50    <tag>subtext</tag>
51  </section>
52</body>
53"""
54
55
56def sanity():
57    """
58    Import sanity.
59
60    >>> from xml.etree import ElementTree
61    >>> from xml.etree import ElementInclude
62    >>> from xml.etree import ElementPath
63    """
64
65def check_method(method):
66    if not hasattr(method, '__call__'):
67        print method, "not callable"
68
69def serialize(elem, to_string=True, **options):
70    import StringIO
71    file = StringIO.StringIO()
72    tree = ET.ElementTree(elem)
73    tree.write(file, **options)
74    if to_string:
75        return file.getvalue()
76    else:
77        file.seek(0)
78        return file
79
80def summarize(elem):
81    if elem.tag == ET.Comment:
82        return "<Comment>"
83    return elem.tag
84
85def summarize_list(seq):
86    return [summarize(elem) for elem in seq]
87
88def normalize_crlf(tree):
89    for elem in tree.iter():
90        if elem.text:
91            elem.text = elem.text.replace("\r\n", "\n")
92        if elem.tail:
93            elem.tail = elem.tail.replace("\r\n", "\n")
94
95def check_string(string):
96    len(string)
97    for char in string:
98        if len(char) != 1:
99            print "expected one-character string, got %r" % char
100    new_string = string + ""
101    new_string = string + " "
102    string[:0]
103
104def check_mapping(mapping):
105    len(mapping)
106    keys = mapping.keys()
107    items = mapping.items()
108    for key in keys:
109        item = mapping[key]
110    mapping["key"] = "value"
111    if mapping["key"] != "value":
112        print "expected value string, got %r" % mapping["key"]
113
114def check_element(element):
115    if not ET.iselement(element):
116        print "not an element"
117    if not hasattr(element, "tag"):
118        print "no tag member"
119    if not hasattr(element, "attrib"):
120        print "no attrib member"
121    if not hasattr(element, "text"):
122        print "no text member"
123    if not hasattr(element, "tail"):
124        print "no tail member"
125
126    check_string(element.tag)
127    check_mapping(element.attrib)
128    if element.text is not None:
129        check_string(element.text)
130    if element.tail is not None:
131        check_string(element.tail)
132    for elem in element:
133        check_element(elem)
134
135# --------------------------------------------------------------------
136# element tree tests
137
138def interface():
139    r"""
140    Test element tree interface.
141
142    >>> element = ET.Element("tag")
143    >>> check_element(element)
144    >>> tree = ET.ElementTree(element)
145    >>> check_element(tree.getroot())
146
147    >>> element = ET.Element("t\xe4g", key="value")
148    >>> tree = ET.ElementTree(element)
149    >>> repr(element)   # doctest: +ELLIPSIS
150    "<Element 't\\xe4g' at 0x...>"
151    >>> element = ET.Element("tag", key="value")
152
153    Make sure all standard element methods exist.
154
155    >>> check_method(element.append)
156    >>> check_method(element.extend)
157    >>> check_method(element.insert)
158    >>> check_method(element.remove)
159    >>> check_method(element.getchildren)
160    >>> check_method(element.find)
161    >>> check_method(element.iterfind)
162    >>> check_method(element.findall)
163    >>> check_method(element.findtext)
164    >>> check_method(element.clear)
165    >>> check_method(element.get)
166    >>> check_method(element.set)
167    >>> check_method(element.keys)
168    >>> check_method(element.items)
169    >>> check_method(element.iter)
170    >>> check_method(element.itertext)
171    >>> check_method(element.getiterator)
172
173    These methods return an iterable. See bug 6472.
174
175    >>> check_method(element.iter("tag").next)
176    >>> check_method(element.iterfind("tag").next)
177    >>> check_method(element.iterfind("*").next)
178    >>> check_method(tree.iter("tag").next)
179    >>> check_method(tree.iterfind("tag").next)
180    >>> check_method(tree.iterfind("*").next)
181
182    These aliases are provided:
183
184    >>> assert ET.XML == ET.fromstring
185    >>> assert ET.PI == ET.ProcessingInstruction
186    >>> assert ET.XMLParser == ET.XMLTreeBuilder
187    """
188
189def simpleops():
190    """
191    Basic method sanity checks.
192
193    >>> elem = ET.XML("<body><tag/></body>")
194    >>> serialize(elem)
195    '<body><tag /></body>'
196    >>> e = ET.Element("tag2")
197    >>> elem.append(e)
198    >>> serialize(elem)
199    '<body><tag /><tag2 /></body>'
200    >>> elem.remove(e)
201    >>> serialize(elem)
202    '<body><tag /></body>'
203    >>> elem.insert(0, e)
204    >>> serialize(elem)
205    '<body><tag2 /><tag /></body>'
206    >>> elem.remove(e)
207    >>> elem.extend([e])
208    >>> serialize(elem)
209    '<body><tag /><tag2 /></body>'
210    >>> elem.remove(e)
211
212    >>> element = ET.Element("tag", key="value")
213    >>> serialize(element) # 1
214    '<tag key="value" />'
215    >>> subelement = ET.Element("subtag")
216    >>> element.append(subelement)
217    >>> serialize(element) # 2
218    '<tag key="value"><subtag /></tag>'
219    >>> element.insert(0, subelement)
220    >>> serialize(element) # 3
221    '<tag key="value"><subtag /><subtag /></tag>'
222    >>> element.remove(subelement)
223    >>> serialize(element) # 4
224    '<tag key="value"><subtag /></tag>'
225    >>> element.remove(subelement)
226    >>> serialize(element) # 5
227    '<tag key="value" />'
228    >>> element.remove(subelement)
229    Traceback (most recent call last):
230    ValueError: list.remove(x): x not in list
231    >>> serialize(element) # 6
232    '<tag key="value" />'
233    >>> element[0:0] = [subelement, subelement, subelement]
234    >>> serialize(element[1])
235    '<subtag />'
236    >>> element[1:9] == [element[1], element[2]]
237    True
238    >>> element[:9:2] == [element[0], element[2]]
239    True
240    >>> del element[1:2]
241    >>> serialize(element)
242    '<tag key="value"><subtag /><subtag /></tag>'
243    """
244
245def cdata():
246    """
247    Test CDATA handling (etc).
248
249    >>> serialize(ET.XML("<tag>hello</tag>"))
250    '<tag>hello</tag>'
251    >>> serialize(ET.XML("<tag>&#104;&#101;&#108;&#108;&#111;</tag>"))
252    '<tag>hello</tag>'
253    >>> serialize(ET.XML("<tag><![CDATA[hello]]></tag>"))
254    '<tag>hello</tag>'
255    """
256
257# Only with Python implementation
258def simplefind():
259    """
260    Test find methods using the elementpath fallback.
261
262    >>> from xml.etree import ElementTree
263
264    >>> CurrentElementPath = ElementTree.ElementPath
265    >>> ElementTree.ElementPath = ElementTree._SimpleElementPath()
266    >>> elem = ElementTree.XML(SAMPLE_XML)
267    >>> elem.find("tag").tag
268    'tag'
269    >>> ElementTree.ElementTree(elem).find("tag").tag
270    'tag'
271    >>> elem.findtext("tag")
272    'text'
273    >>> elem.findtext("tog")
274    >>> elem.findtext("tog", "default")
275    'default'
276    >>> ElementTree.ElementTree(elem).findtext("tag")
277    'text'
278    >>> summarize_list(elem.findall("tag"))
279    ['tag', 'tag']
280    >>> summarize_list(elem.findall(".//tag"))
281    ['tag', 'tag', 'tag']
282
283    Path syntax doesn't work in this case.
284
285    >>> elem.find("section/tag")
286    >>> elem.findtext("section/tag")
287    >>> summarize_list(elem.findall("section/tag"))
288    []
289
290    >>> ElementTree.ElementPath = CurrentElementPath
291    """
292
293def find():
294    """
295    Test find methods (including xpath syntax).
296
297    >>> elem = ET.XML(SAMPLE_XML)
298    >>> elem.find("tag").tag
299    'tag'
300    >>> ET.ElementTree(elem).find("tag").tag
301    'tag'
302    >>> elem.find("section/tag").tag
303    'tag'
304    >>> elem.find("./tag").tag
305    'tag'
306    >>> ET.ElementTree(elem).find("./tag").tag
307    'tag'
308    >>> ET.ElementTree(elem).find("/tag").tag
309    'tag'
310    >>> elem[2] = ET.XML(SAMPLE_SECTION)
311    >>> elem.find("section/nexttag").tag
312    'nexttag'
313    >>> ET.ElementTree(elem).find("section/tag").tag
314    'tag'
315    >>> ET.ElementTree(elem).find("tog")
316    >>> ET.ElementTree(elem).find("tog/foo")
317    >>> elem.findtext("tag")
318    'text'
319    >>> elem.findtext("section/nexttag")
320    ''
321    >>> elem.findtext("section/nexttag", "default")
322    ''
323    >>> elem.findtext("tog")
324    >>> elem.findtext("tog", "default")
325    'default'
326    >>> ET.ElementTree(elem).findtext("tag")
327    'text'
328    >>> ET.ElementTree(elem).findtext("tog/foo")
329    >>> ET.ElementTree(elem).findtext("tog/foo", "default")
330    'default'
331    >>> ET.ElementTree(elem).findtext("./tag")
332    'text'
333    >>> ET.ElementTree(elem).findtext("/tag")
334    'text'
335    >>> elem.findtext("section/tag")
336    'subtext'
337    >>> ET.ElementTree(elem).findtext("section/tag")
338    'subtext'
339    >>> summarize_list(elem.findall("."))
340    ['body']
341    >>> summarize_list(elem.findall("tag"))
342    ['tag', 'tag']
343    >>> summarize_list(elem.findall("tog"))
344    []
345    >>> summarize_list(elem.findall("tog/foo"))
346    []
347    >>> summarize_list(elem.findall("*"))
348    ['tag', 'tag', 'section']
349    >>> summarize_list(elem.findall(".//tag"))
350    ['tag', 'tag', 'tag', 'tag']
351    >>> summarize_list(elem.findall("section/tag"))
352    ['tag']
353    >>> summarize_list(elem.findall("section//tag"))
354    ['tag', 'tag']
355    >>> summarize_list(elem.findall("section/*"))
356    ['tag', 'nexttag', 'nextsection']
357    >>> summarize_list(elem.findall("section//*"))
358    ['tag', 'nexttag', 'nextsection', 'tag']
359    >>> summarize_list(elem.findall("section/.//*"))
360    ['tag', 'nexttag', 'nextsection', 'tag']
361    >>> summarize_list(elem.findall("*/*"))
362    ['tag', 'nexttag', 'nextsection']
363    >>> summarize_list(elem.findall("*//*"))
364    ['tag', 'nexttag', 'nextsection', 'tag']
365    >>> summarize_list(elem.findall("*/tag"))
366    ['tag']
367    >>> summarize_list(elem.findall("*/./tag"))
368    ['tag']
369    >>> summarize_list(elem.findall("./tag"))
370    ['tag', 'tag']
371    >>> summarize_list(elem.findall(".//tag"))
372    ['tag', 'tag', 'tag', 'tag']
373    >>> summarize_list(elem.findall("././tag"))
374    ['tag', 'tag']
375    >>> summarize_list(elem.findall(".//tag[@class]"))
376    ['tag', 'tag', 'tag']
377    >>> summarize_list(elem.findall(".//tag[@class='a']"))
378    ['tag']
379    >>> summarize_list(elem.findall(".//tag[@class='b']"))
380    ['tag', 'tag']
381    >>> summarize_list(elem.findall(".//tag[@id]"))
382    ['tag']
383    >>> summarize_list(elem.findall(".//section[tag]"))
384    ['section']
385    >>> summarize_list(elem.findall(".//section[element]"))
386    []
387    >>> summarize_list(elem.findall("../tag"))
388    []
389    >>> summarize_list(elem.findall("section/../tag"))
390    ['tag', 'tag']
391    >>> summarize_list(ET.ElementTree(elem).findall("./tag"))
392    ['tag', 'tag']
393
394    Following example is invalid in 1.2.
395    A leading '*' is assumed in 1.3.
396
397    >>> elem.findall("section//") == elem.findall("section//*")
398    True
399
400    ET's Path module handles this case incorrectly; this gives
401    a warning in 1.3, and the behaviour will be modified in 1.4.
402
403    >>> summarize_list(ET.ElementTree(elem).findall("/tag"))
404    ['tag', 'tag']
405
406    >>> elem = ET.XML(SAMPLE_XML_NS)
407    >>> summarize_list(elem.findall("tag"))
408    []
409    >>> summarize_list(elem.findall("{http://effbot.org/ns}tag"))
410    ['{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag']
411    >>> summarize_list(elem.findall(".//{http://effbot.org/ns}tag"))
412    ['{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag']
413    """
414
415def file_init():
416    """
417    >>> import StringIO
418
419    >>> stringfile = StringIO.StringIO(SAMPLE_XML)
420    >>> tree = ET.ElementTree(file=stringfile)
421    >>> tree.find("tag").tag
422    'tag'
423    >>> tree.find("section/tag").tag
424    'tag'
425
426    >>> tree = ET.ElementTree(file=SIMPLE_XMLFILE)
427    >>> tree.find("element").tag
428    'element'
429    >>> tree.find("element/../empty-element").tag
430    'empty-element'
431    """
432
433def bad_find():
434    """
435    Check bad or unsupported path expressions.
436
437    >>> elem = ET.XML(SAMPLE_XML)
438    >>> elem.findall("/tag")
439    Traceback (most recent call last):
440    SyntaxError: cannot use absolute path on element
441    """
442
443def path_cache():
444    """
445    Check that the path cache behaves sanely.
446
447    >>> elem = ET.XML(SAMPLE_XML)
448    >>> for i in range(10): ET.ElementTree(elem).find('./'+str(i))
449    >>> cache_len_10 = len(ET.ElementPath._cache)
450    >>> for i in range(10): ET.ElementTree(elem).find('./'+str(i))
451    >>> len(ET.ElementPath._cache) == cache_len_10
452    True
453    >>> for i in range(20): ET.ElementTree(elem).find('./'+str(i))
454    >>> len(ET.ElementPath._cache) > cache_len_10
455    True
456    >>> for i in range(600): ET.ElementTree(elem).find('./'+str(i))
457    >>> len(ET.ElementPath._cache) < 500
458    True
459    """
460
461def copy():
462    """
463    Test copy handling (etc).
464
465    >>> import copy
466    >>> e1 = ET.XML("<tag>hello<foo/></tag>")
467    >>> e2 = copy.copy(e1)
468    >>> e3 = copy.deepcopy(e1)
469    >>> e1.find("foo").tag = "bar"
470    >>> serialize(e1)
471    '<tag>hello<bar /></tag>'
472    >>> serialize(e2)
473    '<tag>hello<bar /></tag>'
474    >>> serialize(e3)
475    '<tag>hello<foo /></tag>'
476
477    """
478
479def attrib():
480    """
481    Test attribute handling.
482
483    >>> elem = ET.Element("tag")
484    >>> elem.get("key") # 1.1
485    >>> elem.get("key", "default") # 1.2
486    'default'
487    >>> elem.set("key", "value")
488    >>> elem.get("key") # 1.3
489    'value'
490
491    >>> elem = ET.Element("tag", key="value")
492    >>> elem.get("key") # 2.1
493    'value'
494    >>> elem.attrib # 2.2
495    {'key': 'value'}
496
497    >>> attrib = {"key": "value"}
498    >>> elem = ET.Element("tag", attrib)
499    >>> attrib.clear() # check for aliasing issues
500    >>> elem.get("key") # 3.1
501    'value'
502    >>> elem.attrib # 3.2
503    {'key': 'value'}
504
505    >>> attrib = {"key": "value"}
506    >>> elem = ET.Element("tag", **attrib)
507    >>> attrib.clear() # check for aliasing issues
508    >>> elem.get("key") # 4.1
509    'value'
510    >>> elem.attrib # 4.2
511    {'key': 'value'}
512
513    >>> elem = ET.Element("tag", {"key": "other"}, key="value")
514    >>> elem.get("key") # 5.1
515    'value'
516    >>> elem.attrib # 5.2
517    {'key': 'value'}
518
519    >>> elem = ET.Element('test')
520    >>> elem.text = "aa"
521    >>> elem.set('testa', 'testval')
522    >>> elem.set('testb', 'test2')
523    >>> ET.tostring(elem)
524    '<test testa="testval" testb="test2">aa</test>'
525    >>> sorted(elem.keys())
526    ['testa', 'testb']
527    >>> sorted(elem.items())
528    [('testa', 'testval'), ('testb', 'test2')]
529    >>> elem.attrib['testb']
530    'test2'
531    >>> elem.attrib['testb'] = 'test1'
532    >>> elem.attrib['testc'] = 'test2'
533    >>> ET.tostring(elem)
534    '<test testa="testval" testb="test1" testc="test2">aa</test>'
535    """
536
537def makeelement():
538    """
539    Test makeelement handling.
540
541    >>> elem = ET.Element("tag")
542    >>> attrib = {"key": "value"}
543    >>> subelem = elem.makeelement("subtag", attrib)
544    >>> if subelem.attrib is attrib:
545    ...     print "attrib aliasing"
546    >>> elem.append(subelem)
547    >>> serialize(elem)
548    '<tag><subtag key="value" /></tag>'
549
550    >>> elem.clear()
551    >>> serialize(elem)
552    '<tag />'
553    >>> elem.append(subelem)
554    >>> serialize(elem)
555    '<tag><subtag key="value" /></tag>'
556    >>> elem.extend([subelem, subelem])
557    >>> serialize(elem)
558    '<tag><subtag key="value" /><subtag key="value" /><subtag key="value" /></tag>'
559    >>> elem[:] = [subelem]
560    >>> serialize(elem)
561    '<tag><subtag key="value" /></tag>'
562    >>> elem[:] = tuple([subelem])
563    >>> serialize(elem)
564    '<tag><subtag key="value" /></tag>'
565
566    """
567
568def parsefile():
569    """
570    Test parsing from file.
571
572    >>> tree = ET.parse(SIMPLE_XMLFILE)
573    >>> normalize_crlf(tree)
574    >>> tree.write(sys.stdout)
575    <root>
576       <element key="value">text</element>
577       <element>text</element>tail
578       <empty-element />
579    </root>
580    >>> tree = ET.parse(SIMPLE_NS_XMLFILE)
581    >>> normalize_crlf(tree)
582    >>> tree.write(sys.stdout)
583    <ns0:root xmlns:ns0="namespace">
584       <ns0:element key="value">text</ns0:element>
585       <ns0:element>text</ns0:element>tail
586       <ns0:empty-element />
587    </ns0:root>
588
589    >>> with open(SIMPLE_XMLFILE) as f:
590    ...     data = f.read()
591
592    >>> parser = ET.XMLParser()
593    >>> parser.version  # doctest: +ELLIPSIS
594    'Expat ...'
595    >>> parser.feed(data)
596    >>> print serialize(parser.close())
597    <root>
598       <element key="value">text</element>
599       <element>text</element>tail
600       <empty-element />
601    </root>
602
603    >>> parser = ET.XMLTreeBuilder() # 1.2 compatibility
604    >>> parser.feed(data)
605    >>> print serialize(parser.close())
606    <root>
607       <element key="value">text</element>
608       <element>text</element>tail
609       <empty-element />
610    </root>
611
612    >>> target = ET.TreeBuilder()
613    >>> parser = ET.XMLParser(target=target)
614    >>> parser.feed(data)
615    >>> print serialize(parser.close())
616    <root>
617       <element key="value">text</element>
618       <element>text</element>tail
619       <empty-element />
620    </root>
621    """
622
623def parseliteral():
624    """
625    >>> element = ET.XML("<html><body>text</body></html>")
626    >>> ET.ElementTree(element).write(sys.stdout)
627    <html><body>text</body></html>
628    >>> element = ET.fromstring("<html><body>text</body></html>")
629    >>> ET.ElementTree(element).write(sys.stdout)
630    <html><body>text</body></html>
631    >>> sequence = ["<html><body>", "text</bo", "dy></html>"]
632    >>> element = ET.fromstringlist(sequence)
633    >>> print ET.tostring(element)
634    <html><body>text</body></html>
635    >>> print "".join(ET.tostringlist(element))
636    <html><body>text</body></html>
637    >>> ET.tostring(element, "ascii")
638    "<?xml version='1.0' encoding='ascii'?>\\n<html><body>text</body></html>"
639    >>> _, ids = ET.XMLID("<html><body>text</body></html>")
640    >>> len(ids)
641    0
642    >>> _, ids = ET.XMLID("<html><body id='body'>text</body></html>")
643    >>> len(ids)
644    1
645    >>> ids["body"].tag
646    'body'
647    """
648
649def iterparse():
650    """
651    Test iterparse interface.
652
653    >>> iterparse = ET.iterparse
654
655    >>> context = iterparse(SIMPLE_XMLFILE)
656    >>> action, elem = next(context)
657    >>> print action, elem.tag
658    end element
659    >>> for action, elem in context:
660    ...   print action, elem.tag
661    end element
662    end empty-element
663    end root
664    >>> context.root.tag
665    'root'
666
667    >>> context = iterparse(SIMPLE_NS_XMLFILE)
668    >>> for action, elem in context:
669    ...   print action, elem.tag
670    end {namespace}element
671    end {namespace}element
672    end {namespace}empty-element
673    end {namespace}root
674
675    >>> events = ()
676    >>> context = iterparse(SIMPLE_XMLFILE, events)
677    >>> for action, elem in context:
678    ...   print action, elem.tag
679
680    >>> events = ()
681    >>> context = iterparse(SIMPLE_XMLFILE, events=events)
682    >>> for action, elem in context:
683    ...   print action, elem.tag
684
685    >>> events = ("start", "end")
686    >>> context = iterparse(SIMPLE_XMLFILE, events)
687    >>> for action, elem in context:
688    ...   print action, elem.tag
689    start root
690    start element
691    end element
692    start element
693    end element
694    start empty-element
695    end empty-element
696    end root
697
698    >>> events = ("start", "end", "start-ns", "end-ns")
699    >>> context = iterparse(SIMPLE_NS_XMLFILE, events)
700    >>> for action, elem in context:
701    ...   if action in ("start", "end"):
702    ...     print action, elem.tag
703    ...   else:
704    ...     print action, elem
705    start-ns ('', 'namespace')
706    start {namespace}root
707    start {namespace}element
708    end {namespace}element
709    start {namespace}element
710    end {namespace}element
711    start {namespace}empty-element
712    end {namespace}empty-element
713    end {namespace}root
714    end-ns None
715
716    >>> events = ("start", "end", "bogus")
717    >>> with open(SIMPLE_XMLFILE, "rb") as f:
718    ...     iterparse(f, events)
719    Traceback (most recent call last):
720    ValueError: unknown event 'bogus'
721
722    >>> import StringIO
723
724    >>> source = StringIO.StringIO(
725    ...     "<?xml version='1.0' encoding='iso-8859-1'?>\\n"
726    ...     "<body xmlns='http://&#233;ffbot.org/ns'\\n"
727    ...     "      xmlns:cl\\xe9='http://effbot.org/ns'>text</body>\\n")
728    >>> events = ("start-ns",)
729    >>> context = iterparse(source, events)
730    >>> for action, elem in context:
731    ...     print action, elem
732    start-ns ('', u'http://\\xe9ffbot.org/ns')
733    start-ns (u'cl\\xe9', 'http://effbot.org/ns')
734
735    >>> source = StringIO.StringIO("<document />junk")
736    >>> try:
737    ...   for action, elem in iterparse(source):
738    ...     print action, elem.tag
739    ... except ET.ParseError, v:
740    ...   print v
741    end document
742    junk after document element: line 1, column 12
743    """
744
745def writefile():
746    """
747    >>> elem = ET.Element("tag")
748    >>> elem.text = "text"
749    >>> serialize(elem)
750    '<tag>text</tag>'
751    >>> ET.SubElement(elem, "subtag").text = "subtext"
752    >>> serialize(elem)
753    '<tag>text<subtag>subtext</subtag></tag>'
754
755    Test tag suppression
756    >>> elem.tag = None
757    >>> serialize(elem)
758    'text<subtag>subtext</subtag>'
759    >>> elem.insert(0, ET.Comment("comment"))
760    >>> serialize(elem)     # assumes 1.3
761    'text<!--comment--><subtag>subtext</subtag>'
762    >>> elem[0] = ET.PI("key", "value")
763    >>> serialize(elem)
764    'text<?key value?><subtag>subtext</subtag>'
765    """
766
767def custom_builder():
768    """
769    Test parser w. custom builder.
770
771    >>> with open(SIMPLE_XMLFILE) as f:
772    ...     data = f.read()
773    >>> class Builder:
774    ...     def start(self, tag, attrib):
775    ...         print "start", tag
776    ...     def end(self, tag):
777    ...         print "end", tag
778    ...     def data(self, text):
779    ...         pass
780    >>> builder = Builder()
781    >>> parser = ET.XMLParser(target=builder)
782    >>> parser.feed(data)
783    start root
784    start element
785    end element
786    start element
787    end element
788    start empty-element
789    end empty-element
790    end root
791
792    >>> with open(SIMPLE_NS_XMLFILE) as f:
793    ...     data = f.read()
794    >>> class Builder:
795    ...     def start(self, tag, attrib):
796    ...         print "start", tag
797    ...     def end(self, tag):
798    ...         print "end", tag
799    ...     def data(self, text):
800    ...         pass
801    ...     def pi(self, target, data):
802    ...         print "pi", target, repr(data)
803    ...     def comment(self, data):
804    ...         print "comment", repr(data)
805    >>> builder = Builder()
806    >>> parser = ET.XMLParser(target=builder)
807    >>> parser.feed(data)
808    pi pi 'data'
809    comment ' comment '
810    start {namespace}root
811    start {namespace}element
812    end {namespace}element
813    start {namespace}element
814    end {namespace}element
815    start {namespace}empty-element
816    end {namespace}empty-element
817    end {namespace}root
818
819    """
820
821def getchildren():
822    """
823    Test Element.getchildren()
824
825    >>> with open(SIMPLE_XMLFILE, "r") as f:
826    ...     tree = ET.parse(f)
827    >>> for elem in tree.getroot().iter():
828    ...     summarize_list(elem.getchildren())
829    ['element', 'element', 'empty-element']
830    []
831    []
832    []
833    >>> for elem in tree.getiterator():
834    ...     summarize_list(elem.getchildren())
835    ['element', 'element', 'empty-element']
836    []
837    []
838    []
839
840    >>> elem = ET.XML(SAMPLE_XML)
841    >>> len(elem.getchildren())
842    3
843    >>> len(elem[2].getchildren())
844    1
845    >>> elem[:] == elem.getchildren()
846    True
847    >>> child1 = elem[0]
848    >>> child2 = elem[2]
849    >>> del elem[1:2]
850    >>> len(elem.getchildren())
851    2
852    >>> child1 == elem[0]
853    True
854    >>> child2 == elem[1]
855    True
856    >>> elem[0:2] = [child2, child1]
857    >>> child2 == elem[0]
858    True
859    >>> child1 == elem[1]
860    True
861    >>> child1 == elem[0]
862    False
863    >>> elem.clear()
864    >>> elem.getchildren()
865    []
866    """
867
868def writestring():
869    """
870    >>> elem = ET.XML("<html><body>text</body></html>")
871    >>> ET.tostring(elem)
872    '<html><body>text</body></html>'
873    >>> elem = ET.fromstring("<html><body>text</body></html>")
874    >>> ET.tostring(elem)
875    '<html><body>text</body></html>'
876    """
877
878def check_encoding(encoding):
879    """
880    >>> check_encoding("ascii")
881    >>> check_encoding("us-ascii")
882    >>> check_encoding("iso-8859-1")
883    >>> check_encoding("iso-8859-15")
884    >>> check_encoding("cp437")
885    >>> check_encoding("mac-roman")
886    """
887    ET.XML("<?xml version='1.0' encoding='%s'?><xml />" % encoding)
888
889def encoding():
890    r"""
891    Test encoding issues.
892
893    >>> elem = ET.Element("tag")
894    >>> elem.text = u"abc"
895    >>> serialize(elem)
896    '<tag>abc</tag>'
897    >>> serialize(elem, encoding="utf-8")
898    '<tag>abc</tag>'
899    >>> serialize(elem, encoding="us-ascii")
900    '<tag>abc</tag>'
901    >>> serialize(elem, encoding="iso-8859-1")
902    "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>abc</tag>"
903
904    >>> elem.text = "<&\"\'>"
905    >>> serialize(elem)
906    '<tag>&lt;&amp;"\'&gt;</tag>'
907    >>> serialize(elem, encoding="utf-8")
908    '<tag>&lt;&amp;"\'&gt;</tag>'
909    >>> serialize(elem, encoding="us-ascii") # cdata characters
910    '<tag>&lt;&amp;"\'&gt;</tag>'
911    >>> serialize(elem, encoding="iso-8859-1")
912    '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag>&lt;&amp;"\'&gt;</tag>'
913
914    >>> elem.attrib["key"] = "<&\"\'>"
915    >>> elem.text = None
916    >>> serialize(elem)
917    '<tag key="&lt;&amp;&quot;\'&gt;" />'
918    >>> serialize(elem, encoding="utf-8")
919    '<tag key="&lt;&amp;&quot;\'&gt;" />'
920    >>> serialize(elem, encoding="us-ascii")
921    '<tag key="&lt;&amp;&quot;\'&gt;" />'
922    >>> serialize(elem, encoding="iso-8859-1")
923    '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="&lt;&amp;&quot;\'&gt;" />'
924
925    >>> elem.text = u'\xe5\xf6\xf6<>'
926    >>> elem.attrib.clear()
927    >>> serialize(elem)
928    '<tag>&#229;&#246;&#246;&lt;&gt;</tag>'
929    >>> serialize(elem, encoding="utf-8")
930    '<tag>\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;</tag>'
931    >>> serialize(elem, encoding="us-ascii")
932    '<tag>&#229;&#246;&#246;&lt;&gt;</tag>'
933    >>> serialize(elem, encoding="iso-8859-1")
934    "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>\xe5\xf6\xf6&lt;&gt;</tag>"
935
936    >>> elem.attrib["key"] = u'\xe5\xf6\xf6<>'
937    >>> elem.text = None
938    >>> serialize(elem)
939    '<tag key="&#229;&#246;&#246;&lt;&gt;" />'
940    >>> serialize(elem, encoding="utf-8")
941    '<tag key="\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;" />'
942    >>> serialize(elem, encoding="us-ascii")
943    '<tag key="&#229;&#246;&#246;&lt;&gt;" />'
944    >>> serialize(elem, encoding="iso-8859-1")
945    '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="\xe5\xf6\xf6&lt;&gt;" />'
946    """
947
948def methods():
949    r"""
950    Test serialization methods.
951
952    >>> e = ET.XML("<html><link/><script>1 &lt; 2</script></html>")
953    >>> e.tail = "\n"
954    >>> serialize(e)
955    '<html><link /><script>1 &lt; 2</script></html>\n'
956    >>> serialize(e, method=None)
957    '<html><link /><script>1 &lt; 2</script></html>\n'
958    >>> serialize(e, method="xml")
959    '<html><link /><script>1 &lt; 2</script></html>\n'
960    >>> serialize(e, method="html")
961    '<html><link><script>1 < 2</script></html>\n'
962    >>> serialize(e, method="text")
963    '1 < 2\n'
964    """
965
966def iterators():
967    """
968    Test iterators.
969
970    >>> e = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
971    >>> summarize_list(e.iter())
972    ['html', 'body', 'i']
973    >>> summarize_list(e.find("body").iter())
974    ['body', 'i']
975    >>> summarize(next(e.iter()))
976    'html'
977    >>> "".join(e.itertext())
978    'this is a paragraph...'
979    >>> "".join(e.find("body").itertext())
980    'this is a paragraph.'
981    >>> next(e.itertext())
982    'this is a '
983
984    Method iterparse should return an iterator. See bug 6472.
985
986    >>> sourcefile = serialize(e, to_string=False)
987    >>> next(ET.iterparse(sourcefile))  # doctest: +ELLIPSIS
988    ('end', <Element 'i' at 0x...>)
989
990    >>> tree = ET.ElementTree(None)
991    >>> tree.iter()
992    Traceback (most recent call last):
993    AttributeError: 'NoneType' object has no attribute 'iter'
994    """
995
996ENTITY_XML = """\
997<!DOCTYPE points [
998<!ENTITY % user-entities SYSTEM 'user-entities.xml'>
999%user-entities;
1000]>
1001<document>&entity;</document>
1002"""
1003
1004def entity():
1005    """
1006    Test entity handling.
1007
1008    1) good entities
1009
1010    >>> e = ET.XML("<document title='&#x8230;'>test</document>")
1011    >>> serialize(e)
1012    '<document title="&#33328;">test</document>'
1013
1014    2) bad entities
1015
1016    >>> ET.XML("<document>&entity;</document>")
1017    Traceback (most recent call last):
1018    ParseError: undefined entity: line 1, column 10
1019
1020    >>> ET.XML(ENTITY_XML)
1021    Traceback (most recent call last):
1022    ParseError: undefined entity &entity;: line 5, column 10
1023
1024    3) custom entity
1025
1026    >>> parser = ET.XMLParser()
1027    >>> parser.entity["entity"] = "text"
1028    >>> parser.feed(ENTITY_XML)
1029    >>> root = parser.close()
1030    >>> serialize(root)
1031    '<document>text</document>'
1032    """
1033
1034def error(xml):
1035    """
1036
1037    Test error handling.
1038
1039    >>> issubclass(ET.ParseError, SyntaxError)
1040    True
1041    >>> error("foo").position
1042    (1, 0)
1043    >>> error("<tag>&foo;</tag>").position
1044    (1, 5)
1045    >>> error("foobar<").position
1046    (1, 6)
1047
1048    """
1049    try:
1050        ET.XML(xml)
1051    except ET.ParseError:
1052        return sys.exc_value
1053
1054def namespace():
1055    """
1056    Test namespace issues.
1057
1058    1) xml namespace
1059
1060    >>> elem = ET.XML("<tag xml:lang='en' />")
1061    >>> serialize(elem) # 1.1
1062    '<tag xml:lang="en" />'
1063
1064    2) other "well-known" namespaces
1065
1066    >>> elem = ET.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />")
1067    >>> serialize(elem) # 2.1
1068    '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />'
1069
1070    >>> elem = ET.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />")
1071    >>> serialize(elem) # 2.2
1072    '<html:html xmlns:html="http://www.w3.org/1999/xhtml" />'
1073
1074    >>> elem = ET.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />")
1075    >>> serialize(elem) # 2.3
1076    '<ns0:Envelope xmlns:ns0="http://schemas.xmlsoap.org/soap/envelope" />'
1077
1078    3) unknown namespaces
1079    >>> elem = ET.XML(SAMPLE_XML_NS)
1080    >>> print serialize(elem)
1081    <ns0:body xmlns:ns0="http://effbot.org/ns">
1082      <ns0:tag>text</ns0:tag>
1083      <ns0:tag />
1084      <ns0:section>
1085        <ns0:tag>subtext</ns0:tag>
1086      </ns0:section>
1087    </ns0:body>
1088    """
1089
1090def qname():
1091    """
1092    Test QName handling.
1093
1094    1) decorated tags
1095
1096    >>> elem = ET.Element("{uri}tag")
1097    >>> serialize(elem) # 1.1
1098    '<ns0:tag xmlns:ns0="uri" />'
1099    >>> elem = ET.Element(ET.QName("{uri}tag"))
1100    >>> serialize(elem) # 1.2
1101    '<ns0:tag xmlns:ns0="uri" />'
1102    >>> elem = ET.Element(ET.QName("uri", "tag"))
1103    >>> serialize(elem) # 1.3
1104    '<ns0:tag xmlns:ns0="uri" />'
1105    >>> elem = ET.Element(ET.QName("uri", "tag"))
1106    >>> subelem = ET.SubElement(elem, ET.QName("uri", "tag1"))
1107    >>> subelem = ET.SubElement(elem, ET.QName("uri", "tag2"))
1108    >>> serialize(elem) # 1.4
1109    '<ns0:tag xmlns:ns0="uri"><ns0:tag1 /><ns0:tag2 /></ns0:tag>'
1110
1111    2) decorated attributes
1112
1113    >>> elem.clear()
1114    >>> elem.attrib["{uri}key"] = "value"
1115    >>> serialize(elem) # 2.1
1116    '<ns0:tag xmlns:ns0="uri" ns0:key="value" />'
1117
1118    >>> elem.clear()
1119    >>> elem.attrib[ET.QName("{uri}key")] = "value"
1120    >>> serialize(elem) # 2.2
1121    '<ns0:tag xmlns:ns0="uri" ns0:key="value" />'
1122
1123    3) decorated values are not converted by default, but the
1124       QName wrapper can be used for values
1125
1126    >>> elem.clear()
1127    >>> elem.attrib["{uri}key"] = "{uri}value"
1128    >>> serialize(elem) # 3.1
1129    '<ns0:tag xmlns:ns0="uri" ns0:key="{uri}value" />'
1130
1131    >>> elem.clear()
1132    >>> elem.attrib["{uri}key"] = ET.QName("{uri}value")
1133    >>> serialize(elem) # 3.2
1134    '<ns0:tag xmlns:ns0="uri" ns0:key="ns0:value" />'
1135
1136    >>> elem.clear()
1137    >>> subelem = ET.Element("tag")
1138    >>> subelem.attrib["{uri1}key"] = ET.QName("{uri2}value")
1139    >>> elem.append(subelem)
1140    >>> elem.append(subelem)
1141    >>> serialize(elem) # 3.3
1142    '<ns0:tag xmlns:ns0="uri" xmlns:ns1="uri1" xmlns:ns2="uri2"><tag ns1:key="ns2:value" /><tag ns1:key="ns2:value" /></ns0:tag>'
1143
1144    4) Direct QName tests
1145
1146    >>> str(ET.QName('ns', 'tag'))
1147    '{ns}tag'
1148    >>> str(ET.QName('{ns}tag'))
1149    '{ns}tag'
1150    >>> q1 = ET.QName('ns', 'tag')
1151    >>> q2 = ET.QName('ns', 'tag')
1152    >>> q1 == q2
1153    True
1154    >>> q2 = ET.QName('ns', 'other-tag')
1155    >>> q1 == q2
1156    False
1157    >>> q1 == 'ns:tag'
1158    False
1159    >>> q1 == '{ns}tag'
1160    True
1161    """
1162
1163def doctype_public():
1164    """
1165    Test PUBLIC doctype.
1166
1167    >>> elem = ET.XML('<!DOCTYPE html PUBLIC'
1168    ...   ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
1169    ...   ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
1170    ...   '<html>text</html>')
1171
1172    """
1173
1174def xpath_tokenizer(p):
1175    """
1176    Test the XPath tokenizer.
1177
1178    >>> # tests from the xml specification
1179    >>> xpath_tokenizer("*")
1180    ['*']
1181    >>> xpath_tokenizer("text()")
1182    ['text', '()']
1183    >>> xpath_tokenizer("@name")
1184    ['@', 'name']
1185    >>> xpath_tokenizer("@*")
1186    ['@', '*']
1187    >>> xpath_tokenizer("para[1]")
1188    ['para', '[', '1', ']']
1189    >>> xpath_tokenizer("para[last()]")
1190    ['para', '[', 'last', '()', ']']
1191    >>> xpath_tokenizer("*/para")
1192    ['*', '/', 'para']
1193    >>> xpath_tokenizer("/doc/chapter[5]/section[2]")
1194    ['/', 'doc', '/', 'chapter', '[', '5', ']', '/', 'section', '[', '2', ']']
1195    >>> xpath_tokenizer("chapter//para")
1196    ['chapter', '//', 'para']
1197    >>> xpath_tokenizer("//para")
1198    ['//', 'para']
1199    >>> xpath_tokenizer("//olist/item")
1200    ['//', 'olist', '/', 'item']
1201    >>> xpath_tokenizer(".")
1202    ['.']
1203    >>> xpath_tokenizer(".//para")
1204    ['.', '//', 'para']
1205    >>> xpath_tokenizer("..")
1206    ['..']
1207    >>> xpath_tokenizer("../@lang")
1208    ['..', '/', '@', 'lang']
1209    >>> xpath_tokenizer("chapter[title]")
1210    ['chapter', '[', 'title', ']']
1211    >>> xpath_tokenizer("employee[@secretary and @assistant]")
1212    ['employee', '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']']
1213
1214    >>> # additional tests
1215    >>> xpath_tokenizer("{http://spam}egg")
1216    ['{http://spam}egg']
1217    >>> xpath_tokenizer("./spam.egg")
1218    ['.', '/', 'spam.egg']
1219    >>> xpath_tokenizer(".//{http://spam}egg")
1220    ['.', '//', '{http://spam}egg']
1221    """
1222    from xml.etree import ElementPath
1223    out = []
1224    for op, tag in ElementPath.xpath_tokenizer(p):
1225        out.append(op or tag)
1226    return out
1227
1228def processinginstruction():
1229    """
1230    Test ProcessingInstruction directly
1231
1232    >>> ET.tostring(ET.ProcessingInstruction('test', 'instruction'))
1233    '<?test instruction?>'
1234    >>> ET.tostring(ET.PI('test', 'instruction'))
1235    '<?test instruction?>'
1236
1237    Issue #2746
1238
1239    >>> ET.tostring(ET.PI('test', '<testing&>'))
1240    '<?test <testing&>?>'
1241    >>> ET.tostring(ET.PI('test', u'<testing&>\xe3'), 'latin1')
1242    "<?xml version='1.0' encoding='latin1'?>\\n<?test <testing&>\\xe3?>"
1243    """
1244
1245#
1246# xinclude tests (samples from appendix C of the xinclude specification)
1247
1248XINCLUDE = {}
1249
1250XINCLUDE["C1.xml"] = """\
1251<?xml version='1.0'?>
1252<document xmlns:xi="http://www.w3.org/2001/XInclude">
1253  <p>120 Mz is adequate for an average home user.</p>
1254  <xi:include href="disclaimer.xml"/>
1255</document>
1256"""
1257
1258XINCLUDE["disclaimer.xml"] = """\
1259<?xml version='1.0'?>
1260<disclaimer>
1261  <p>The opinions represented herein represent those of the individual
1262  and should not be interpreted as official policy endorsed by this
1263  organization.</p>
1264</disclaimer>
1265"""
1266
1267XINCLUDE["C2.xml"] = """\
1268<?xml version='1.0'?>
1269<document xmlns:xi="http://www.w3.org/2001/XInclude">
1270  <p>This document has been accessed
1271  <xi:include href="count.txt" parse="text"/> times.</p>
1272</document>
1273"""
1274
1275XINCLUDE["count.txt"] = "324387"
1276
1277XINCLUDE["C2b.xml"] = """\
1278<?xml version='1.0'?>
1279<document xmlns:xi="http://www.w3.org/2001/XInclude">
1280  <p>This document has been <em>accessed</em>
1281  <xi:include href="count.txt" parse="text"/> times.</p>
1282</document>
1283"""
1284
1285XINCLUDE["C3.xml"] = """\
1286<?xml version='1.0'?>
1287<document xmlns:xi="http://www.w3.org/2001/XInclude">
1288  <p>The following is the source of the "data.xml" resource:</p>
1289  <example><xi:include href="data.xml" parse="text"/></example>
1290</document>
1291"""
1292
1293XINCLUDE["data.xml"] = """\
1294<?xml version='1.0'?>
1295<data>
1296  <item><![CDATA[Brooks & Shields]]></item>
1297</data>
1298"""
1299
1300XINCLUDE["C5.xml"] = """\
1301<?xml version='1.0'?>
1302<div xmlns:xi="http://www.w3.org/2001/XInclude">
1303  <xi:include href="example.txt" parse="text">
1304    <xi:fallback>
1305      <xi:include href="fallback-example.txt" parse="text">
1306        <xi:fallback><a href="mailto:bob@example.org">Report error</a></xi:fallback>
1307      </xi:include>
1308    </xi:fallback>
1309  </xi:include>
1310</div>
1311"""
1312
1313XINCLUDE["default.xml"] = """\
1314<?xml version='1.0'?>
1315<document xmlns:xi="http://www.w3.org/2001/XInclude">
1316  <p>Example.</p>
1317  <xi:include href="{}"/>
1318</document>
1319""".format(cgi.escape(SIMPLE_XMLFILE, True))
1320
1321def xinclude_loader(href, parse="xml", encoding=None):
1322    try:
1323        data = XINCLUDE[href]
1324    except KeyError:
1325        raise IOError("resource not found")
1326    if parse == "xml":
1327        from xml.etree.ElementTree import XML
1328        return XML(data)
1329    return data
1330
1331def xinclude():
1332    r"""
1333    Basic inclusion example (XInclude C.1)
1334
1335    >>> from xml.etree import ElementTree as ET
1336    >>> from xml.etree import ElementInclude
1337
1338    >>> document = xinclude_loader("C1.xml")
1339    >>> ElementInclude.include(document, xinclude_loader)
1340    >>> print serialize(document) # C1
1341    <document>
1342      <p>120 Mz is adequate for an average home user.</p>
1343      <disclaimer>
1344      <p>The opinions represented herein represent those of the individual
1345      and should not be interpreted as official policy endorsed by this
1346      organization.</p>
1347    </disclaimer>
1348    </document>
1349
1350    Textual inclusion example (XInclude C.2)
1351
1352    >>> document = xinclude_loader("C2.xml")
1353    >>> ElementInclude.include(document, xinclude_loader)
1354    >>> print serialize(document) # C2
1355    <document>
1356      <p>This document has been accessed
1357      324387 times.</p>
1358    </document>
1359
1360    Textual inclusion after sibling element (based on modified XInclude C.2)
1361
1362    >>> document = xinclude_loader("C2b.xml")
1363    >>> ElementInclude.include(document, xinclude_loader)
1364    >>> print(serialize(document)) # C2b
1365    <document>
1366      <p>This document has been <em>accessed</em>
1367      324387 times.</p>
1368    </document>
1369
1370    Textual inclusion of XML example (XInclude C.3)
1371
1372    >>> document = xinclude_loader("C3.xml")
1373    >>> ElementInclude.include(document, xinclude_loader)
1374    >>> print serialize(document) # C3
1375    <document>
1376      <p>The following is the source of the "data.xml" resource:</p>
1377      <example>&lt;?xml version='1.0'?&gt;
1378    &lt;data&gt;
1379      &lt;item&gt;&lt;![CDATA[Brooks &amp; Shields]]&gt;&lt;/item&gt;
1380    &lt;/data&gt;
1381    </example>
1382    </document>
1383
1384    Fallback example (XInclude C.5)
1385    Note! Fallback support is not yet implemented
1386
1387    >>> document = xinclude_loader("C5.xml")
1388    >>> ElementInclude.include(document, xinclude_loader)
1389    Traceback (most recent call last):
1390    IOError: resource not found
1391    >>> # print serialize(document) # C5
1392    """
1393
1394def xinclude_default():
1395    """
1396    >>> from xml.etree import ElementInclude
1397
1398    >>> document = xinclude_loader("default.xml")
1399    >>> ElementInclude.include(document)
1400    >>> print serialize(document) # default
1401    <document>
1402      <p>Example.</p>
1403      <root>
1404       <element key="value">text</element>
1405       <element>text</element>tail
1406       <empty-element />
1407    </root>
1408    </document>
1409    """
1410
1411#
1412# badly formatted xi:include tags
1413
1414XINCLUDE_BAD = {}
1415
1416XINCLUDE_BAD["B1.xml"] = """\
1417<?xml version='1.0'?>
1418<document xmlns:xi="http://www.w3.org/2001/XInclude">
1419  <p>120 Mz is adequate for an average home user.</p>
1420  <xi:include href="disclaimer.xml" parse="BAD_TYPE"/>
1421</document>
1422"""
1423
1424XINCLUDE_BAD["B2.xml"] = """\
1425<?xml version='1.0'?>
1426<div xmlns:xi="http://www.w3.org/2001/XInclude">
1427    <xi:fallback></xi:fallback>
1428</div>
1429"""
1430
1431def xinclude_failures():
1432    r"""
1433    Test failure to locate included XML file.
1434
1435    >>> from xml.etree import ElementInclude
1436
1437    >>> def none_loader(href, parser, encoding=None):
1438    ...     return None
1439
1440    >>> document = ET.XML(XINCLUDE["C1.xml"])
1441    >>> ElementInclude.include(document, loader=none_loader)
1442    Traceback (most recent call last):
1443    FatalIncludeError: cannot load 'disclaimer.xml' as 'xml'
1444
1445    Test failure to locate included text file.
1446
1447    >>> document = ET.XML(XINCLUDE["C2.xml"])
1448    >>> ElementInclude.include(document, loader=none_loader)
1449    Traceback (most recent call last):
1450    FatalIncludeError: cannot load 'count.txt' as 'text'
1451
1452    Test bad parse type.
1453
1454    >>> document = ET.XML(XINCLUDE_BAD["B1.xml"])
1455    >>> ElementInclude.include(document, loader=none_loader)
1456    Traceback (most recent call last):
1457    FatalIncludeError: unknown parse type in xi:include tag ('BAD_TYPE')
1458
1459    Test xi:fallback outside xi:include.
1460
1461    >>> document = ET.XML(XINCLUDE_BAD["B2.xml"])
1462    >>> ElementInclude.include(document, loader=none_loader)
1463    Traceback (most recent call last):
1464    FatalIncludeError: xi:fallback tag must be child of xi:include ('{http://www.w3.org/2001/XInclude}fallback')
1465    """
1466
1467# --------------------------------------------------------------------
1468# reported bugs
1469
1470def bug_xmltoolkit21():
1471    """
1472
1473    marshaller gives obscure errors for non-string values
1474
1475    >>> elem = ET.Element(123)
1476    >>> serialize(elem) # tag
1477    Traceback (most recent call last):
1478    TypeError: cannot serialize 123 (type int)
1479    >>> elem = ET.Element("elem")
1480    >>> elem.text = 123
1481    >>> serialize(elem) # text
1482    Traceback (most recent call last):
1483    TypeError: cannot serialize 123 (type int)
1484    >>> elem = ET.Element("elem")
1485    >>> elem.tail = 123
1486    >>> serialize(elem) # tail
1487    Traceback (most recent call last):
1488    TypeError: cannot serialize 123 (type int)
1489    >>> elem = ET.Element("elem")
1490    >>> elem.set(123, "123")
1491    >>> serialize(elem) # attribute key
1492    Traceback (most recent call last):
1493    TypeError: cannot serialize 123 (type int)
1494    >>> elem = ET.Element("elem")
1495    >>> elem.set("123", 123)
1496    >>> serialize(elem) # attribute value
1497    Traceback (most recent call last):
1498    TypeError: cannot serialize 123 (type int)
1499
1500    """
1501
1502def bug_xmltoolkit25():
1503    """
1504
1505    typo in ElementTree.findtext
1506
1507    >>> elem = ET.XML(SAMPLE_XML)
1508    >>> tree = ET.ElementTree(elem)
1509    >>> tree.findtext("tag")
1510    'text'
1511    >>> tree.findtext("section/tag")
1512    'subtext'
1513
1514    """
1515
1516def bug_xmltoolkit28():
1517    """
1518
1519    .//tag causes exceptions
1520
1521    >>> tree = ET.XML("<doc><table><tbody/></table></doc>")
1522    >>> summarize_list(tree.findall(".//thead"))
1523    []
1524    >>> summarize_list(tree.findall(".//tbody"))
1525    ['tbody']
1526
1527    """
1528
1529def bug_xmltoolkitX1():
1530    """
1531
1532    dump() doesn't flush the output buffer
1533
1534    >>> tree = ET.XML("<doc><table><tbody/></table></doc>")
1535    >>> ET.dump(tree); sys.stdout.write("tail")
1536    <doc><table><tbody /></table></doc>
1537    tail
1538
1539    """
1540
1541def bug_xmltoolkit39():
1542    """
1543
1544    non-ascii element and attribute names doesn't work
1545
1546    >>> tree = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g />")
1547    >>> ET.tostring(tree, "utf-8")
1548    '<t\\xc3\\xa4g />'
1549
1550    >>> tree = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><tag \xe4ttr='v&#228;lue' />")
1551    >>> tree.attrib
1552    {u'\\xe4ttr': u'v\\xe4lue'}
1553    >>> ET.tostring(tree, "utf-8")
1554    '<tag \\xc3\\xa4ttr="v\\xc3\\xa4lue" />'
1555
1556    >>> tree = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g>text</t\xe4g>")
1557    >>> ET.tostring(tree, "utf-8")
1558    '<t\\xc3\\xa4g>text</t\\xc3\\xa4g>'
1559
1560    >>> tree = ET.Element(u"t\u00e4g")
1561    >>> ET.tostring(tree, "utf-8")
1562    '<t\\xc3\\xa4g />'
1563
1564    >>> tree = ET.Element("tag")
1565    >>> tree.set(u"\u00e4ttr", u"v\u00e4lue")
1566    >>> ET.tostring(tree, "utf-8")
1567    '<tag \\xc3\\xa4ttr="v\\xc3\\xa4lue" />'
1568
1569    """
1570
1571def bug_xmltoolkit54():
1572    """
1573
1574    problems handling internally defined entities
1575
1576    >>> e = ET.XML("<!DOCTYPE doc [<!ENTITY ldots '&#x8230;'>]><doc>&ldots;</doc>")
1577    >>> serialize(e)
1578    '<doc>&#33328;</doc>'
1579
1580    """
1581
1582def bug_xmltoolkit55():
1583    """
1584
1585    make sure we're reporting the first error, not the last
1586
1587    >>> e = ET.XML("<!DOCTYPE doc SYSTEM 'doc.dtd'><doc>&ldots;&ndots;&rdots;</doc>")
1588    Traceback (most recent call last):
1589    ParseError: undefined entity &ldots;: line 1, column 36
1590
1591    """
1592
1593class ExceptionFile:
1594    def read(self, x):
1595        raise IOError
1596
1597def xmltoolkit60():
1598    """
1599
1600    Handle crash in stream source.
1601    >>> tree = ET.parse(ExceptionFile())
1602    Traceback (most recent call last):
1603    IOError
1604
1605    """
1606
1607XMLTOOLKIT62_DOC = """<?xml version="1.0" encoding="UTF-8"?>
1608<!DOCTYPE patent-application-publication SYSTEM "pap-v15-2001-01-31.dtd" []>
1609<patent-application-publication>
1610<subdoc-abstract>
1611<paragraph id="A-0001" lvl="0">A new cultivar of Begonia plant named &lsquo;BCT9801BEG&rsquo;.</paragraph>
1612</subdoc-abstract>
1613</patent-application-publication>"""
1614
1615
1616def xmltoolkit62():
1617    """
1618
1619    Don't crash when using custom entities.
1620
1621    >>> xmltoolkit62()
1622    u'A new cultivar of Begonia plant named \u2018BCT9801BEG\u2019.'
1623
1624    """
1625    ENTITIES = {u'rsquo': u'\u2019', u'lsquo': u'\u2018'}
1626    parser = ET.XMLTreeBuilder()
1627    parser.entity.update(ENTITIES)
1628    parser.feed(XMLTOOLKIT62_DOC)
1629    t = parser.close()
1630    return t.find('.//paragraph').text
1631
1632def xmltoolkit63():
1633    """
1634
1635    Check reference leak.
1636    >>> xmltoolkit63()
1637    >>> count = sys.getrefcount(None)
1638    >>> for i in range(1000):
1639    ...     xmltoolkit63()
1640    >>> sys.getrefcount(None) - count
1641    0
1642
1643    """
1644    tree = ET.TreeBuilder()
1645    tree.start("tag", {})
1646    tree.data("text")
1647    tree.end("tag")
1648
1649# --------------------------------------------------------------------
1650
1651
1652def bug_200708_newline():
1653    r"""
1654
1655    Preserve newlines in attributes.
1656
1657    >>> e = ET.Element('SomeTag', text="def _f():\n  return 3\n")
1658    >>> ET.tostring(e)
1659    '<SomeTag text="def _f():&#10;  return 3&#10;" />'
1660    >>> ET.XML(ET.tostring(e)).get("text")
1661    'def _f():\n  return 3\n'
1662    >>> ET.tostring(ET.XML(ET.tostring(e)))
1663    '<SomeTag text="def _f():&#10;  return 3&#10;" />'
1664
1665    """
1666
1667def bug_200708_close():
1668    """
1669
1670    Test default builder.
1671    >>> parser = ET.XMLParser() # default
1672    >>> parser.feed("<element>some text</element>")
1673    >>> summarize(parser.close())
1674    'element'
1675
1676    Test custom builder.
1677    >>> class EchoTarget:
1678    ...     def close(self):
1679    ...         return ET.Element("element") # simulate root
1680    >>> parser = ET.XMLParser(EchoTarget())
1681    >>> parser.feed("<element>some text</element>")
1682    >>> summarize(parser.close())
1683    'element'
1684
1685    """
1686
1687def bug_200709_default_namespace():
1688    """
1689
1690    >>> e = ET.Element("{default}elem")
1691    >>> s = ET.SubElement(e, "{default}elem")
1692    >>> serialize(e, default_namespace="default") # 1
1693    '<elem xmlns="default"><elem /></elem>'
1694
1695    >>> e = ET.Element("{default}elem")
1696    >>> s = ET.SubElement(e, "{default}elem")
1697    >>> s = ET.SubElement(e, "{not-default}elem")
1698    >>> serialize(e, default_namespace="default") # 2
1699    '<elem xmlns="default" xmlns:ns1="not-default"><elem /><ns1:elem /></elem>'
1700
1701    >>> e = ET.Element("{default}elem")
1702    >>> s = ET.SubElement(e, "{default}elem")
1703    >>> s = ET.SubElement(e, "elem") # unprefixed name
1704    >>> serialize(e, default_namespace="default") # 3
1705    Traceback (most recent call last):
1706    ValueError: cannot use non-qualified names with default_namespace option
1707
1708    """
1709
1710def bug_200709_register_namespace():
1711    """
1712
1713    >>> ET.tostring(ET.Element("{http://namespace.invalid/does/not/exist/}title"))
1714    '<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />'
1715    >>> ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/")
1716    >>> ET.tostring(ET.Element("{http://namespace.invalid/does/not/exist/}title"))
1717    '<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />'
1718
1719    And the Dublin Core namespace is in the default list:
1720
1721    >>> ET.tostring(ET.Element("{http://purl.org/dc/elements/1.1/}title"))
1722    '<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />'
1723
1724    """
1725
1726def bug_200709_element_comment():
1727    """
1728
1729    Not sure if this can be fixed, really (since the serializer needs
1730    ET.Comment, not cET.comment).
1731
1732    >>> a = ET.Element('a')
1733    >>> a.append(ET.Comment('foo'))
1734    >>> a[0].tag == ET.Comment
1735    True
1736
1737    >>> a = ET.Element('a')
1738    >>> a.append(ET.PI('foo'))
1739    >>> a[0].tag == ET.PI
1740    True
1741
1742    """
1743
1744def bug_200709_element_insert():
1745    """
1746
1747    >>> a = ET.Element('a')
1748    >>> b = ET.SubElement(a, 'b')
1749    >>> c = ET.SubElement(a, 'c')
1750    >>> d = ET.Element('d')
1751    >>> a.insert(0, d)
1752    >>> summarize_list(a)
1753    ['d', 'b', 'c']
1754    >>> a.insert(-1, d)
1755    >>> summarize_list(a)
1756    ['d', 'b', 'd', 'c']
1757
1758    """
1759
1760def bug_200709_iter_comment():
1761    """
1762
1763    >>> a = ET.Element('a')
1764    >>> b = ET.SubElement(a, 'b')
1765    >>> comment_b = ET.Comment("TEST-b")
1766    >>> b.append(comment_b)
1767    >>> summarize_list(a.iter(ET.Comment))
1768    ['<Comment>']
1769
1770    """
1771
1772# --------------------------------------------------------------------
1773# reported on bugs.python.org
1774
1775def bug_1534630():
1776    """
1777
1778    >>> bob = ET.TreeBuilder()
1779    >>> e = bob.data("data")
1780    >>> e = bob.start("tag", {})
1781    >>> e = bob.end("tag")
1782    >>> e = bob.close()
1783    >>> serialize(e)
1784    '<tag />'
1785
1786    """
1787
1788def check_issue6233():
1789    """
1790
1791    >>> e = ET.XML("<?xml version='1.0' encoding='utf-8'?><body>t\\xc3\\xa3g</body>")
1792    >>> ET.tostring(e, 'ascii')
1793    "<?xml version='1.0' encoding='ascii'?>\\n<body>t&#227;g</body>"
1794    >>> e = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><body>t\\xe3g</body>")
1795    >>> ET.tostring(e, 'ascii')
1796    "<?xml version='1.0' encoding='ascii'?>\\n<body>t&#227;g</body>"
1797
1798    """
1799
1800def check_issue3151():
1801    """
1802
1803    >>> e = ET.XML('<prefix:localname xmlns:prefix="${stuff}"/>')
1804    >>> e.tag
1805    '{${stuff}}localname'
1806    >>> t = ET.ElementTree(e)
1807    >>> ET.tostring(e)
1808    '<ns0:localname xmlns:ns0="${stuff}" />'
1809
1810    """
1811
1812def check_issue6565():
1813    """
1814
1815    >>> elem = ET.XML("<body><tag/></body>")
1816    >>> summarize_list(elem)
1817    ['tag']
1818    >>> newelem = ET.XML(SAMPLE_XML)
1819    >>> elem[:] = newelem[:]
1820    >>> summarize_list(elem)
1821    ['tag', 'tag', 'section']
1822
1823    """
1824
1825def check_html_empty_elems_serialization(self):
1826    # issue 15970
1827    # from http://www.w3.org/TR/html401/index/elements.html
1828    """
1829
1830    >>> empty_elems = ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'FRAME', 'HR',
1831    ...                'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM']
1832    >>> elems = ''.join('<%s />' % elem for elem in empty_elems)
1833    >>> serialize(ET.XML('<html>%s</html>' % elems), method='html')
1834    '<html><AREA><BASE><BASEFONT><BR><COL><FRAME><HR><IMG><INPUT><ISINDEX><LINK><META><PARAM></html>'
1835    >>> serialize(ET.XML('<html>%s</html>' % elems.lower()), method='html')
1836    '<html><area><base><basefont><br><col><frame><hr><img><input><isindex><link><meta><param></html>'
1837    >>> elems = ''.join('<%s></%s>' % (elem, elem) for elem in empty_elems)
1838    >>> serialize(ET.XML('<html>%s</html>' % elems), method='html')
1839    '<html><AREA><BASE><BASEFONT><BR><COL><FRAME><HR><IMG><INPUT><ISINDEX><LINK><META><PARAM></html>'
1840    >>> serialize(ET.XML('<html>%s</html>' % elems.lower()), method='html')
1841    '<html><area><base><basefont><br><col><frame><hr><img><input><isindex><link><meta><param></html>'
1842
1843    """
1844
1845# --------------------------------------------------------------------
1846
1847
1848class CleanContext(object):
1849    """Provide default namespace mapping and path cache."""
1850    checkwarnings = None
1851
1852    def __init__(self, quiet=False):
1853        if sys.flags.optimize >= 2:
1854            # under -OO, doctests cannot be run and therefore not all warnings
1855            # will be emitted
1856            quiet = True
1857        deprecations = (
1858            # Search behaviour is broken if search path starts with "/".
1859            ("This search is broken in 1.3 and earlier, and will be fixed "
1860             "in a future version.  If you rely on the current behaviour, "
1861             "change it to '.+'", FutureWarning),
1862            # Element.getchildren() and Element.getiterator() are deprecated.
1863            ("This method will be removed in future versions.  "
1864             "Use .+ instead.", DeprecationWarning),
1865            ("This method will be removed in future versions.  "
1866             "Use .+ instead.", PendingDeprecationWarning),
1867            # XMLParser.doctype() is deprecated.
1868            ("This method of XMLParser is deprecated.  Define doctype.. "
1869             "method on the TreeBuilder target.", DeprecationWarning))
1870        self.checkwarnings = test_support.check_warnings(*deprecations,
1871                                                         quiet=quiet)
1872
1873    def __enter__(self):
1874        from xml.etree import ElementTree
1875        self._nsmap = ElementTree._namespace_map
1876        self._path_cache = ElementTree.ElementPath._cache
1877        # Copy the default namespace mapping
1878        ElementTree._namespace_map = self._nsmap.copy()
1879        # Copy the path cache (should be empty)
1880        ElementTree.ElementPath._cache = self._path_cache.copy()
1881        self.checkwarnings.__enter__()
1882
1883    def __exit__(self, *args):
1884        from xml.etree import ElementTree
1885        # Restore mapping and path cache
1886        ElementTree._namespace_map = self._nsmap
1887        ElementTree.ElementPath._cache = self._path_cache
1888        self.checkwarnings.__exit__(*args)
1889
1890
1891def test_main(module_name='xml.etree.ElementTree'):
1892    from test import test_xml_etree
1893
1894    use_py_module = (module_name == 'xml.etree.ElementTree')
1895
1896    # The same doctests are used for both the Python and the C implementations
1897    assert test_xml_etree.ET.__name__ == module_name
1898
1899    # XXX the C module should give the same warnings as the Python module
1900    with CleanContext(quiet=not use_py_module):
1901        test_support.run_doctest(test_xml_etree, verbosity=True)
1902
1903    # The module should not be changed by the tests
1904    assert test_xml_etree.ET.__name__ == module_name
1905
1906if __name__ == '__main__':
1907    test_main()
1908