1"""\
2minidom.py -- a lightweight DOM implementation.
3
4parse("foo.xml")
5
6parseString("<foo><bar/></foo>")
7
8Todo:
9=====
10 * convenience methods for getting elements and text.
11 * more testing
12 * bring some of the writer and linearizer code into conformance with this
13        interface
14 * SAX 2 namespaces
15"""
16
17import xml.dom
18
19from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg
20from xml.dom.minicompat import *
21from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS
22
23# This is used by the ID-cache invalidation checks; the list isn't
24# actually complete, since the nodes being checked will never be the
25# DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE.  (The node being checked is
26# the node being added or removed, not the node being modified.)
27#
28_nodeTypes_with_children = (xml.dom.Node.ELEMENT_NODE,
29                            xml.dom.Node.ENTITY_REFERENCE_NODE)
30
31
32class Node(xml.dom.Node):
33    namespaceURI = None # this is non-null only for elements and attributes
34    parentNode = None
35    ownerDocument = None
36    nextSibling = None
37    previousSibling = None
38
39    prefix = EMPTY_PREFIX # non-null only for NS elements and attributes
40
41    def __nonzero__(self):
42        return True
43
44    def toxml(self, encoding = None):
45        return self.toprettyxml("", "", encoding)
46
47    def toprettyxml(self, indent="\t", newl="\n", encoding = None):
48        # indent = the indentation string to prepend, per level
49        # newl = the newline string to append
50        writer = _get_StringIO()
51        if encoding is not None:
52            import codecs
53            # Can't use codecs.getwriter to preserve 2.0 compatibility
54            writer = codecs.lookup(encoding)[3](writer)
55        if self.nodeType == Node.DOCUMENT_NODE:
56            # Can pass encoding only to document, to put it into XML header
57            self.writexml(writer, "", indent, newl, encoding)
58        else:
59            self.writexml(writer, "", indent, newl)
60        return writer.getvalue()
61
62    def hasChildNodes(self):
63        if self.childNodes:
64            return True
65        else:
66            return False
67
68    def _get_childNodes(self):
69        return self.childNodes
70
71    def _get_firstChild(self):
72        if self.childNodes:
73            return self.childNodes[0]
74
75    def _get_lastChild(self):
76        if self.childNodes:
77            return self.childNodes[-1]
78
79    def insertBefore(self, newChild, refChild):
80        if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
81            for c in tuple(newChild.childNodes):
82                self.insertBefore(c, refChild)
83            ### The DOM does not clearly specify what to return in this case
84            return newChild
85        if newChild.nodeType not in self._child_node_types:
86            raise xml.dom.HierarchyRequestErr(
87                "%s cannot be child of %s" % (repr(newChild), repr(self)))
88        if newChild.parentNode is not None:
89            newChild.parentNode.removeChild(newChild)
90        if refChild is None:
91            self.appendChild(newChild)
92        else:
93            try:
94                index = self.childNodes.index(refChild)
95            except ValueError:
96                raise xml.dom.NotFoundErr()
97            if newChild.nodeType in _nodeTypes_with_children:
98                _clear_id_cache(self)
99            self.childNodes.insert(index, newChild)
100            newChild.nextSibling = refChild
101            refChild.previousSibling = newChild
102            if index:
103                node = self.childNodes[index-1]
104                node.nextSibling = newChild
105                newChild.previousSibling = node
106            else:
107                newChild.previousSibling = None
108            newChild.parentNode = self
109        return newChild
110
111    def appendChild(self, node):
112        if node.nodeType == self.DOCUMENT_FRAGMENT_NODE:
113            for c in tuple(node.childNodes):
114                self.appendChild(c)
115            ### The DOM does not clearly specify what to return in this case
116            return node
117        if node.nodeType not in self._child_node_types:
118            raise xml.dom.HierarchyRequestErr(
119                "%s cannot be child of %s" % (repr(node), repr(self)))
120        elif node.nodeType in _nodeTypes_with_children:
121            _clear_id_cache(self)
122        if node.parentNode is not None:
123            node.parentNode.removeChild(node)
124        _append_child(self, node)
125        node.nextSibling = None
126        return node
127
128    def replaceChild(self, newChild, oldChild):
129        if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
130            refChild = oldChild.nextSibling
131            self.removeChild(oldChild)
132            return self.insertBefore(newChild, refChild)
133        if newChild.nodeType not in self._child_node_types:
134            raise xml.dom.HierarchyRequestErr(
135                "%s cannot be child of %s" % (repr(newChild), repr(self)))
136        if newChild is oldChild:
137            return
138        if newChild.parentNode is not None:
139            newChild.parentNode.removeChild(newChild)
140        try:
141            index = self.childNodes.index(oldChild)
142        except ValueError:
143            raise xml.dom.NotFoundErr()
144        self.childNodes[index] = newChild
145        newChild.parentNode = self
146        oldChild.parentNode = None
147        if (newChild.nodeType in _nodeTypes_with_children
148            or oldChild.nodeType in _nodeTypes_with_children):
149            _clear_id_cache(self)
150        newChild.nextSibling = oldChild.nextSibling
151        newChild.previousSibling = oldChild.previousSibling
152        oldChild.nextSibling = None
153        oldChild.previousSibling = None
154        if newChild.previousSibling:
155            newChild.previousSibling.nextSibling = newChild
156        if newChild.nextSibling:
157            newChild.nextSibling.previousSibling = newChild
158        return oldChild
159
160    def removeChild(self, oldChild):
161        try:
162            self.childNodes.remove(oldChild)
163        except ValueError:
164            raise xml.dom.NotFoundErr()
165        if oldChild.nextSibling is not None:
166            oldChild.nextSibling.previousSibling = oldChild.previousSibling
167        if oldChild.previousSibling is not None:
168            oldChild.previousSibling.nextSibling = oldChild.nextSibling
169        oldChild.nextSibling = oldChild.previousSibling = None
170        if oldChild.nodeType in _nodeTypes_with_children:
171            _clear_id_cache(self)
172
173        oldChild.parentNode = None
174        return oldChild
175
176    def normalize(self):
177        L = []
178        for child in self.childNodes:
179            if child.nodeType == Node.TEXT_NODE:
180                if not child.data:
181                    # empty text node; discard
182                    if L:
183                        L[-1].nextSibling = child.nextSibling
184                    if child.nextSibling:
185                        child.nextSibling.previousSibling = child.previousSibling
186                    child.unlink()
187                elif L and L[-1].nodeType == child.nodeType:
188                    # collapse text node
189                    node = L[-1]
190                    node.data = node.data + child.data
191                    node.nextSibling = child.nextSibling
192                    if child.nextSibling:
193                        child.nextSibling.previousSibling = node
194                    child.unlink()
195                else:
196                    L.append(child)
197            else:
198                L.append(child)
199                if child.nodeType == Node.ELEMENT_NODE:
200                    child.normalize()
201        self.childNodes[:] = L
202
203    def cloneNode(self, deep):
204        return _clone_node(self, deep, self.ownerDocument or self)
205
206    def isSupported(self, feature, version):
207        return self.ownerDocument.implementation.hasFeature(feature, version)
208
209    def _get_localName(self):
210        # Overridden in Element and Attr where localName can be Non-Null
211        return None
212
213    # Node interfaces from Level 3 (WD 9 April 2002)
214
215    def isSameNode(self, other):
216        return self is other
217
218    def getInterface(self, feature):
219        if self.isSupported(feature, None):
220            return self
221        else:
222            return None
223
224    # The "user data" functions use a dictionary that is only present
225    # if some user data has been set, so be careful not to assume it
226    # exists.
227
228    def getUserData(self, key):
229        try:
230            return self._user_data[key][0]
231        except (AttributeError, KeyError):
232            return None
233
234    def setUserData(self, key, data, handler):
235        old = None
236        try:
237            d = self._user_data
238        except AttributeError:
239            d = {}
240            self._user_data = d
241        if key in d:
242            old = d[key][0]
243        if data is None:
244            # ignore handlers passed for None
245            handler = None
246            if old is not None:
247                del d[key]
248        else:
249            d[key] = (data, handler)
250        return old
251
252    def _call_user_data_handler(self, operation, src, dst):
253        if hasattr(self, "_user_data"):
254            for key, (data, handler) in self._user_data.items():
255                if handler is not None:
256                    handler.handle(operation, key, data, src, dst)
257
258    # minidom-specific API:
259
260    def unlink(self):
261        self.parentNode = self.ownerDocument = None
262        if self.childNodes:
263            for child in self.childNodes:
264                child.unlink()
265            self.childNodes = NodeList()
266        self.previousSibling = None
267        self.nextSibling = None
268
269defproperty(Node, "firstChild", doc="First child node, or None.")
270defproperty(Node, "lastChild",  doc="Last child node, or None.")
271defproperty(Node, "localName",  doc="Namespace-local name of this node.")
272
273
274def _append_child(self, node):
275    # fast path with less checks; usable by DOM builders if careful
276    childNodes = self.childNodes
277    if childNodes:
278        last = childNodes[-1]
279        node.__dict__["previousSibling"] = last
280        last.__dict__["nextSibling"] = node
281    childNodes.append(node)
282    node.__dict__["parentNode"] = self
283
284def _in_document(node):
285    # return True iff node is part of a document tree
286    while node is not None:
287        if node.nodeType == Node.DOCUMENT_NODE:
288            return True
289        node = node.parentNode
290    return False
291
292def _write_data(writer, data):
293    "Writes datachars to writer."
294    if data:
295        data = data.replace("&", "&amp;").replace("<", "&lt;"). \
296                    replace("\"", "&quot;").replace(">", "&gt;")
297        writer.write(data)
298
299def _get_elements_by_tagName_helper(parent, name, rc):
300    for node in parent.childNodes:
301        if node.nodeType == Node.ELEMENT_NODE and \
302            (name == "*" or node.tagName == name):
303            rc.append(node)
304        _get_elements_by_tagName_helper(node, name, rc)
305    return rc
306
307def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc):
308    for node in parent.childNodes:
309        if node.nodeType == Node.ELEMENT_NODE:
310            if ((localName == "*" or node.localName == localName) and
311                (nsURI == "*" or node.namespaceURI == nsURI)):
312                rc.append(node)
313            _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc)
314    return rc
315
316class DocumentFragment(Node):
317    nodeType = Node.DOCUMENT_FRAGMENT_NODE
318    nodeName = "#document-fragment"
319    nodeValue = None
320    attributes = None
321    parentNode = None
322    _child_node_types = (Node.ELEMENT_NODE,
323                         Node.TEXT_NODE,
324                         Node.CDATA_SECTION_NODE,
325                         Node.ENTITY_REFERENCE_NODE,
326                         Node.PROCESSING_INSTRUCTION_NODE,
327                         Node.COMMENT_NODE,
328                         Node.NOTATION_NODE)
329
330    def __init__(self):
331        self.childNodes = NodeList()
332
333
334class Attr(Node):
335    nodeType = Node.ATTRIBUTE_NODE
336    attributes = None
337    ownerElement = None
338    specified = False
339    _is_id = False
340
341    _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE)
342
343    def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None,
344                 prefix=None):
345        # skip setattr for performance
346        d = self.__dict__
347        d["nodeName"] = d["name"] = qName
348        d["namespaceURI"] = namespaceURI
349        d["prefix"] = prefix
350        d['childNodes'] = NodeList()
351
352        # Add the single child node that represents the value of the attr
353        self.childNodes.append(Text())
354
355        # nodeValue and value are set elsewhere
356
357    def _get_localName(self):
358        return self.nodeName.split(":", 1)[-1]
359
360    def _get_name(self):
361        return self.name
362
363    def _get_specified(self):
364        return self.specified
365
366    def __setattr__(self, name, value):
367        d = self.__dict__
368        if name in ("value", "nodeValue"):
369            d["value"] = d["nodeValue"] = value
370            d2 = self.childNodes[0].__dict__
371            d2["data"] = d2["nodeValue"] = value
372            if self.ownerElement is not None:
373                _clear_id_cache(self.ownerElement)
374        elif name in ("name", "nodeName"):
375            d["name"] = d["nodeName"] = value
376            if self.ownerElement is not None:
377                _clear_id_cache(self.ownerElement)
378        else:
379            d[name] = value
380
381    def _set_prefix(self, prefix):
382        nsuri = self.namespaceURI
383        if prefix == "xmlns":
384            if nsuri and nsuri != XMLNS_NAMESPACE:
385                raise xml.dom.NamespaceErr(
386                    "illegal use of 'xmlns' prefix for the wrong namespace")
387        d = self.__dict__
388        d['prefix'] = prefix
389        if prefix is None:
390            newName = self.localName
391        else:
392            newName = "%s:%s" % (prefix, self.localName)
393        if self.ownerElement:
394            _clear_id_cache(self.ownerElement)
395        d['nodeName'] = d['name'] = newName
396
397    def _set_value(self, value):
398        d = self.__dict__
399        d['value'] = d['nodeValue'] = value
400        if self.ownerElement:
401            _clear_id_cache(self.ownerElement)
402        self.childNodes[0].data = value
403
404    def unlink(self):
405        # This implementation does not call the base implementation
406        # since most of that is not needed, and the expense of the
407        # method call is not warranted.  We duplicate the removal of
408        # children, but that's all we needed from the base class.
409        elem = self.ownerElement
410        if elem is not None:
411            del elem._attrs[self.nodeName]
412            del elem._attrsNS[(self.namespaceURI, self.localName)]
413            if self._is_id:
414                self._is_id = False
415                elem._magic_id_nodes -= 1
416                self.ownerDocument._magic_id_count -= 1
417        for child in self.childNodes:
418            child.unlink()
419        del self.childNodes[:]
420
421    def _get_isId(self):
422        if self._is_id:
423            return True
424        doc = self.ownerDocument
425        elem = self.ownerElement
426        if doc is None or elem is None:
427            return False
428
429        info = doc._get_elem_info(elem)
430        if info is None:
431            return False
432        if self.namespaceURI:
433            return info.isIdNS(self.namespaceURI, self.localName)
434        else:
435            return info.isId(self.nodeName)
436
437    def _get_schemaType(self):
438        doc = self.ownerDocument
439        elem = self.ownerElement
440        if doc is None or elem is None:
441            return _no_type
442
443        info = doc._get_elem_info(elem)
444        if info is None:
445            return _no_type
446        if self.namespaceURI:
447            return info.getAttributeTypeNS(self.namespaceURI, self.localName)
448        else:
449            return info.getAttributeType(self.nodeName)
450
451defproperty(Attr, "isId",       doc="True if this attribute is an ID.")
452defproperty(Attr, "localName",  doc="Namespace-local name of this attribute.")
453defproperty(Attr, "schemaType", doc="Schema type for this attribute.")
454
455
456class NamedNodeMap(object):
457    """The attribute list is a transient interface to the underlying
458    dictionaries.  Mutations here will change the underlying element's
459    dictionary.
460
461    Ordering is imposed artificially and does not reflect the order of
462    attributes as found in an input document.
463    """
464
465    __slots__ = ('_attrs', '_attrsNS', '_ownerElement')
466
467    def __init__(self, attrs, attrsNS, ownerElement):
468        self._attrs = attrs
469        self._attrsNS = attrsNS
470        self._ownerElement = ownerElement
471
472    def _get_length(self):
473        return len(self._attrs)
474
475    def item(self, index):
476        try:
477            return self[self._attrs.keys()[index]]
478        except IndexError:
479            return None
480
481    def items(self):
482        L = []
483        for node in self._attrs.values():
484            L.append((node.nodeName, node.value))
485        return L
486
487    def itemsNS(self):
488        L = []
489        for node in self._attrs.values():
490            L.append(((node.namespaceURI, node.localName), node.value))
491        return L
492
493    def has_key(self, key):
494        if isinstance(key, StringTypes):
495            return key in self._attrs
496        else:
497            return key in self._attrsNS
498
499    def keys(self):
500        return self._attrs.keys()
501
502    def keysNS(self):
503        return self._attrsNS.keys()
504
505    def values(self):
506        return self._attrs.values()
507
508    def get(self, name, value=None):
509        return self._attrs.get(name, value)
510
511    __len__ = _get_length
512
513    __hash__ = None # Mutable type can't be correctly hashed
514    def __cmp__(self, other):
515        if self._attrs is getattr(other, "_attrs", None):
516            return 0
517        else:
518            return cmp(id(self), id(other))
519
520    def __getitem__(self, attname_or_tuple):
521        if isinstance(attname_or_tuple, tuple):
522            return self._attrsNS[attname_or_tuple]
523        else:
524            return self._attrs[attname_or_tuple]
525
526    # same as set
527    def __setitem__(self, attname, value):
528        if isinstance(value, StringTypes):
529            try:
530                node = self._attrs[attname]
531            except KeyError:
532                node = Attr(attname)
533                node.ownerDocument = self._ownerElement.ownerDocument
534                self.setNamedItem(node)
535            node.value = value
536        else:
537            if not isinstance(value, Attr):
538                raise TypeError, "value must be a string or Attr object"
539            node = value
540            self.setNamedItem(node)
541
542    def getNamedItem(self, name):
543        try:
544            return self._attrs[name]
545        except KeyError:
546            return None
547
548    def getNamedItemNS(self, namespaceURI, localName):
549        try:
550            return self._attrsNS[(namespaceURI, localName)]
551        except KeyError:
552            return None
553
554    def removeNamedItem(self, name):
555        n = self.getNamedItem(name)
556        if n is not None:
557            _clear_id_cache(self._ownerElement)
558            del self._attrs[n.nodeName]
559            del self._attrsNS[(n.namespaceURI, n.localName)]
560            if 'ownerElement' in n.__dict__:
561                n.__dict__['ownerElement'] = None
562            return n
563        else:
564            raise xml.dom.NotFoundErr()
565
566    def removeNamedItemNS(self, namespaceURI, localName):
567        n = self.getNamedItemNS(namespaceURI, localName)
568        if n is not None:
569            _clear_id_cache(self._ownerElement)
570            del self._attrsNS[(n.namespaceURI, n.localName)]
571            del self._attrs[n.nodeName]
572            if 'ownerElement' in n.__dict__:
573                n.__dict__['ownerElement'] = None
574            return n
575        else:
576            raise xml.dom.NotFoundErr()
577
578    def setNamedItem(self, node):
579        if not isinstance(node, Attr):
580            raise xml.dom.HierarchyRequestErr(
581                "%s cannot be child of %s" % (repr(node), repr(self)))
582        old = self._attrs.get(node.name)
583        if old:
584            old.unlink()
585        self._attrs[node.name] = node
586        self._attrsNS[(node.namespaceURI, node.localName)] = node
587        node.ownerElement = self._ownerElement
588        _clear_id_cache(node.ownerElement)
589        return old
590
591    def setNamedItemNS(self, node):
592        return self.setNamedItem(node)
593
594    def __delitem__(self, attname_or_tuple):
595        node = self[attname_or_tuple]
596        _clear_id_cache(node.ownerElement)
597        node.unlink()
598
599    def __getstate__(self):
600        return self._attrs, self._attrsNS, self._ownerElement
601
602    def __setstate__(self, state):
603        self._attrs, self._attrsNS, self._ownerElement = state
604
605defproperty(NamedNodeMap, "length",
606            doc="Number of nodes in the NamedNodeMap.")
607
608AttributeList = NamedNodeMap
609
610
611class TypeInfo(object):
612    __slots__ = 'namespace', 'name'
613
614    def __init__(self, namespace, name):
615        self.namespace = namespace
616        self.name = name
617
618    def __repr__(self):
619        if self.namespace:
620            return "<TypeInfo %r (from %r)>" % (self.name, self.namespace)
621        else:
622            return "<TypeInfo %r>" % self.name
623
624    def _get_name(self):
625        return self.name
626
627    def _get_namespace(self):
628        return self.namespace
629
630_no_type = TypeInfo(None, None)
631
632class Element(Node):
633    nodeType = Node.ELEMENT_NODE
634    nodeValue = None
635    schemaType = _no_type
636
637    _magic_id_nodes = 0
638
639    _child_node_types = (Node.ELEMENT_NODE,
640                         Node.PROCESSING_INSTRUCTION_NODE,
641                         Node.COMMENT_NODE,
642                         Node.TEXT_NODE,
643                         Node.CDATA_SECTION_NODE,
644                         Node.ENTITY_REFERENCE_NODE)
645
646    def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None,
647                 localName=None):
648        self.tagName = self.nodeName = tagName
649        self.prefix = prefix
650        self.namespaceURI = namespaceURI
651        self.childNodes = NodeList()
652
653        self._attrs = {}   # attributes are double-indexed:
654        self._attrsNS = {} #    tagName -> Attribute
655                           #    URI,localName -> Attribute
656                           # in the future: consider lazy generation
657                           # of attribute objects this is too tricky
658                           # for now because of headaches with
659                           # namespaces.
660
661    def _get_localName(self):
662        return self.tagName.split(":", 1)[-1]
663
664    def _get_tagName(self):
665        return self.tagName
666
667    def unlink(self):
668        for attr in self._attrs.values():
669            attr.unlink()
670        self._attrs = None
671        self._attrsNS = None
672        Node.unlink(self)
673
674    def getAttribute(self, attname):
675        try:
676            return self._attrs[attname].value
677        except KeyError:
678            return ""
679
680    def getAttributeNS(self, namespaceURI, localName):
681        try:
682            return self._attrsNS[(namespaceURI, localName)].value
683        except KeyError:
684            return ""
685
686    def setAttribute(self, attname, value):
687        attr = self.getAttributeNode(attname)
688        if attr is None:
689            attr = Attr(attname)
690            # for performance
691            d = attr.__dict__
692            d["value"] = d["nodeValue"] = value
693            d["ownerDocument"] = self.ownerDocument
694            self.setAttributeNode(attr)
695        elif value != attr.value:
696            d = attr.__dict__
697            d["value"] = d["nodeValue"] = value
698            if attr.isId:
699                _clear_id_cache(self)
700
701    def setAttributeNS(self, namespaceURI, qualifiedName, value):
702        prefix, localname = _nssplit(qualifiedName)
703        attr = self.getAttributeNodeNS(namespaceURI, localname)
704        if attr is None:
705            # for performance
706            attr = Attr(qualifiedName, namespaceURI, localname, prefix)
707            d = attr.__dict__
708            d["prefix"] = prefix
709            d["nodeName"] = qualifiedName
710            d["value"] = d["nodeValue"] = value
711            d["ownerDocument"] = self.ownerDocument
712            self.setAttributeNode(attr)
713        else:
714            d = attr.__dict__
715            if value != attr.value:
716                d["value"] = d["nodeValue"] = value
717                if attr.isId:
718                    _clear_id_cache(self)
719            if attr.prefix != prefix:
720                d["prefix"] = prefix
721                d["nodeName"] = qualifiedName
722
723    def getAttributeNode(self, attrname):
724        return self._attrs.get(attrname)
725
726    def getAttributeNodeNS(self, namespaceURI, localName):
727        return self._attrsNS.get((namespaceURI, localName))
728
729    def setAttributeNode(self, attr):
730        if attr.ownerElement not in (None, self):
731            raise xml.dom.InuseAttributeErr("attribute node already owned")
732        old1 = self._attrs.get(attr.name, None)
733        if old1 is not None:
734            self.removeAttributeNode(old1)
735        old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None)
736        if old2 is not None and old2 is not old1:
737            self.removeAttributeNode(old2)
738        _set_attribute_node(self, attr)
739
740        if old1 is not attr:
741            # It might have already been part of this node, in which case
742            # it doesn't represent a change, and should not be returned.
743            return old1
744        if old2 is not attr:
745            return old2
746
747    setAttributeNodeNS = setAttributeNode
748
749    def removeAttribute(self, name):
750        try:
751            attr = self._attrs[name]
752        except KeyError:
753            raise xml.dom.NotFoundErr()
754        self.removeAttributeNode(attr)
755
756    def removeAttributeNS(self, namespaceURI, localName):
757        try:
758            attr = self._attrsNS[(namespaceURI, localName)]
759        except KeyError:
760            raise xml.dom.NotFoundErr()
761        self.removeAttributeNode(attr)
762
763    def removeAttributeNode(self, node):
764        if node is None:
765            raise xml.dom.NotFoundErr()
766        try:
767            self._attrs[node.name]
768        except KeyError:
769            raise xml.dom.NotFoundErr()
770        _clear_id_cache(self)
771        node.unlink()
772        # Restore this since the node is still useful and otherwise
773        # unlinked
774        node.ownerDocument = self.ownerDocument
775
776    removeAttributeNodeNS = removeAttributeNode
777
778    def hasAttribute(self, name):
779        return name in self._attrs
780
781    def hasAttributeNS(self, namespaceURI, localName):
782        return (namespaceURI, localName) in self._attrsNS
783
784    def getElementsByTagName(self, name):
785        return _get_elements_by_tagName_helper(self, name, NodeList())
786
787    def getElementsByTagNameNS(self, namespaceURI, localName):
788        return _get_elements_by_tagName_ns_helper(
789            self, namespaceURI, localName, NodeList())
790
791    def __repr__(self):
792        return "<DOM Element: %s at %#x>" % (self.tagName, id(self))
793
794    def writexml(self, writer, indent="", addindent="", newl=""):
795        # indent = current indentation
796        # addindent = indentation to add to higher levels
797        # newl = newline string
798        writer.write(indent+"<" + self.tagName)
799
800        attrs = self._get_attributes()
801        a_names = attrs.keys()
802        a_names.sort()
803
804        for a_name in a_names:
805            writer.write(" %s=\"" % a_name)
806            _write_data(writer, attrs[a_name].value)
807            writer.write("\"")
808        if self.childNodes:
809            writer.write(">%s"%(newl))
810            for node in self.childNodes:
811                node.writexml(writer,indent+addindent,addindent,newl)
812            writer.write("%s</%s>%s" % (indent,self.tagName,newl))
813        else:
814            writer.write("/>%s"%(newl))
815
816    def _get_attributes(self):
817        return NamedNodeMap(self._attrs, self._attrsNS, self)
818
819    def hasAttributes(self):
820        if self._attrs:
821            return True
822        else:
823            return False
824
825    # DOM Level 3 attributes, based on the 22 Oct 2002 draft
826
827    def setIdAttribute(self, name):
828        idAttr = self.getAttributeNode(name)
829        self.setIdAttributeNode(idAttr)
830
831    def setIdAttributeNS(self, namespaceURI, localName):
832        idAttr = self.getAttributeNodeNS(namespaceURI, localName)
833        self.setIdAttributeNode(idAttr)
834
835    def setIdAttributeNode(self, idAttr):
836        if idAttr is None or not self.isSameNode(idAttr.ownerElement):
837            raise xml.dom.NotFoundErr()
838        if _get_containing_entref(self) is not None:
839            raise xml.dom.NoModificationAllowedErr()
840        if not idAttr._is_id:
841            idAttr.__dict__['_is_id'] = True
842            self._magic_id_nodes += 1
843            self.ownerDocument._magic_id_count += 1
844            _clear_id_cache(self)
845
846defproperty(Element, "attributes",
847            doc="NamedNodeMap of attributes on the element.")
848defproperty(Element, "localName",
849            doc="Namespace-local name of this element.")
850
851
852def _set_attribute_node(element, attr):
853    _clear_id_cache(element)
854    element._attrs[attr.name] = attr
855    element._attrsNS[(attr.namespaceURI, attr.localName)] = attr
856
857    # This creates a circular reference, but Element.unlink()
858    # breaks the cycle since the references to the attribute
859    # dictionaries are tossed.
860    attr.__dict__['ownerElement'] = element
861
862
863class Childless:
864    """Mixin that makes childless-ness easy to implement and avoids
865    the complexity of the Node methods that deal with children.
866    """
867
868    attributes = None
869    childNodes = EmptyNodeList()
870    firstChild = None
871    lastChild = None
872
873    def _get_firstChild(self):
874        return None
875
876    def _get_lastChild(self):
877        return None
878
879    def appendChild(self, node):
880        raise xml.dom.HierarchyRequestErr(
881            self.nodeName + " nodes cannot have children")
882
883    def hasChildNodes(self):
884        return False
885
886    def insertBefore(self, newChild, refChild):
887        raise xml.dom.HierarchyRequestErr(
888            self.nodeName + " nodes do not have children")
889
890    def removeChild(self, oldChild):
891        raise xml.dom.NotFoundErr(
892            self.nodeName + " nodes do not have children")
893
894    def normalize(self):
895        # For childless nodes, normalize() has nothing to do.
896        pass
897
898    def replaceChild(self, newChild, oldChild):
899        raise xml.dom.HierarchyRequestErr(
900            self.nodeName + " nodes do not have children")
901
902
903class ProcessingInstruction(Childless, Node):
904    nodeType = Node.PROCESSING_INSTRUCTION_NODE
905
906    def __init__(self, target, data):
907        self.target = self.nodeName = target
908        self.data = self.nodeValue = data
909
910    def _get_data(self):
911        return self.data
912    def _set_data(self, value):
913        d = self.__dict__
914        d['data'] = d['nodeValue'] = value
915
916    def _get_target(self):
917        return self.target
918    def _set_target(self, value):
919        d = self.__dict__
920        d['target'] = d['nodeName'] = value
921
922    def __setattr__(self, name, value):
923        if name == "data" or name == "nodeValue":
924            self.__dict__['data'] = self.__dict__['nodeValue'] = value
925        elif name == "target" or name == "nodeName":
926            self.__dict__['target'] = self.__dict__['nodeName'] = value
927        else:
928            self.__dict__[name] = value
929
930    def writexml(self, writer, indent="", addindent="", newl=""):
931        writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl))
932
933
934class CharacterData(Childless, Node):
935    def _get_length(self):
936        return len(self.data)
937    __len__ = _get_length
938
939    def _get_data(self):
940        return self.__dict__['data']
941    def _set_data(self, data):
942        d = self.__dict__
943        d['data'] = d['nodeValue'] = data
944
945    _get_nodeValue = _get_data
946    _set_nodeValue = _set_data
947
948    def __setattr__(self, name, value):
949        if name == "data" or name == "nodeValue":
950            self.__dict__['data'] = self.__dict__['nodeValue'] = value
951        else:
952            self.__dict__[name] = value
953
954    def __repr__(self):
955        data = self.data
956        if len(data) > 10:
957            dotdotdot = "..."
958        else:
959            dotdotdot = ""
960        return '<DOM %s node "%r%s">' % (
961            self.__class__.__name__, data[0:10], dotdotdot)
962
963    def substringData(self, offset, count):
964        if offset < 0:
965            raise xml.dom.IndexSizeErr("offset cannot be negative")
966        if offset >= len(self.data):
967            raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
968        if count < 0:
969            raise xml.dom.IndexSizeErr("count cannot be negative")
970        return self.data[offset:offset+count]
971
972    def appendData(self, arg):
973        self.data = self.data + arg
974
975    def insertData(self, offset, arg):
976        if offset < 0:
977            raise xml.dom.IndexSizeErr("offset cannot be negative")
978        if offset >= len(self.data):
979            raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
980        if arg:
981            self.data = "%s%s%s" % (
982                self.data[:offset], arg, self.data[offset:])
983
984    def deleteData(self, offset, count):
985        if offset < 0:
986            raise xml.dom.IndexSizeErr("offset cannot be negative")
987        if offset >= len(self.data):
988            raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
989        if count < 0:
990            raise xml.dom.IndexSizeErr("count cannot be negative")
991        if count:
992            self.data = self.data[:offset] + self.data[offset+count:]
993
994    def replaceData(self, offset, count, arg):
995        if offset < 0:
996            raise xml.dom.IndexSizeErr("offset cannot be negative")
997        if offset >= len(self.data):
998            raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
999        if count < 0:
1000            raise xml.dom.IndexSizeErr("count cannot be negative")
1001        if count:
1002            self.data = "%s%s%s" % (
1003                self.data[:offset], arg, self.data[offset+count:])
1004
1005defproperty(CharacterData, "length", doc="Length of the string data.")
1006
1007
1008class Text(CharacterData):
1009    # Make sure we don't add an instance __dict__ if we don't already
1010    # have one, at least when that's possible:
1011    # XXX this does not work, CharacterData is an old-style class
1012    # __slots__ = ()
1013
1014    nodeType = Node.TEXT_NODE
1015    nodeName = "#text"
1016    attributes = None
1017
1018    def splitText(self, offset):
1019        if offset < 0 or offset > len(self.data):
1020            raise xml.dom.IndexSizeErr("illegal offset value")
1021        newText = self.__class__()
1022        newText.data = self.data[offset:]
1023        newText.ownerDocument = self.ownerDocument
1024        next = self.nextSibling
1025        if self.parentNode and self in self.parentNode.childNodes:
1026            if next is None:
1027                self.parentNode.appendChild(newText)
1028            else:
1029                self.parentNode.insertBefore(newText, next)
1030        self.data = self.data[:offset]
1031        return newText
1032
1033    def writexml(self, writer, indent="", addindent="", newl=""):
1034        _write_data(writer, "%s%s%s"%(indent, self.data, newl))
1035
1036    # DOM Level 3 (WD 9 April 2002)
1037
1038    def _get_wholeText(self):
1039        L = [self.data]
1040        n = self.previousSibling
1041        while n is not None:
1042            if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1043                L.insert(0, n.data)
1044                n = n.previousSibling
1045            else:
1046                break
1047        n = self.nextSibling
1048        while n is not None:
1049            if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1050                L.append(n.data)
1051                n = n.nextSibling
1052            else:
1053                break
1054        return ''.join(L)
1055
1056    def replaceWholeText(self, content):
1057        # XXX This needs to be seriously changed if minidom ever
1058        # supports EntityReference nodes.
1059        parent = self.parentNode
1060        n = self.previousSibling
1061        while n is not None:
1062            if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1063                next = n.previousSibling
1064                parent.removeChild(n)
1065                n = next
1066            else:
1067                break
1068        n = self.nextSibling
1069        if not content:
1070            parent.removeChild(self)
1071        while n is not None:
1072            if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1073                next = n.nextSibling
1074                parent.removeChild(n)
1075                n = next
1076            else:
1077                break
1078        if content:
1079            d = self.__dict__
1080            d['data'] = content
1081            d['nodeValue'] = content
1082            return self
1083        else:
1084            return None
1085
1086    def _get_isWhitespaceInElementContent(self):
1087        if self.data.strip():
1088            return False
1089        elem = _get_containing_element(self)
1090        if elem is None:
1091            return False
1092        info = self.ownerDocument._get_elem_info(elem)
1093        if info is None:
1094            return False
1095        else:
1096            return info.isElementContent()
1097
1098defproperty(Text, "isWhitespaceInElementContent",
1099            doc="True iff this text node contains only whitespace"
1100                " and is in element content.")
1101defproperty(Text, "wholeText",
1102            doc="The text of all logically-adjacent text nodes.")
1103
1104
1105def _get_containing_element(node):
1106    c = node.parentNode
1107    while c is not None:
1108        if c.nodeType == Node.ELEMENT_NODE:
1109            return c
1110        c = c.parentNode
1111    return None
1112
1113def _get_containing_entref(node):
1114    c = node.parentNode
1115    while c is not None:
1116        if c.nodeType == Node.ENTITY_REFERENCE_NODE:
1117            return c
1118        c = c.parentNode
1119    return None
1120
1121
1122class Comment(Childless, CharacterData):
1123    nodeType = Node.COMMENT_NODE
1124    nodeName = "#comment"
1125
1126    def __init__(self, data):
1127        self.data = self.nodeValue = data
1128
1129    def writexml(self, writer, indent="", addindent="", newl=""):
1130        if "--" in self.data:
1131            raise ValueError("'--' is not allowed in a comment node")
1132        writer.write("%s<!--%s-->%s" % (indent, self.data, newl))
1133
1134
1135class CDATASection(Text):
1136    # Make sure we don't add an instance __dict__ if we don't already
1137    # have one, at least when that's possible:
1138    # XXX this does not work, Text is an old-style class
1139    # __slots__ = ()
1140
1141    nodeType = Node.CDATA_SECTION_NODE
1142    nodeName = "#cdata-section"
1143
1144    def writexml(self, writer, indent="", addindent="", newl=""):
1145        if self.data.find("]]>") >= 0:
1146            raise ValueError("']]>' not allowed in a CDATA section")
1147        writer.write("<![CDATA[%s]]>" % self.data)
1148
1149
1150class ReadOnlySequentialNamedNodeMap(object):
1151    __slots__ = '_seq',
1152
1153    def __init__(self, seq=()):
1154        # seq should be a list or tuple
1155        self._seq = seq
1156
1157    def __len__(self):
1158        return len(self._seq)
1159
1160    def _get_length(self):
1161        return len(self._seq)
1162
1163    def getNamedItem(self, name):
1164        for n in self._seq:
1165            if n.nodeName == name:
1166                return n
1167
1168    def getNamedItemNS(self, namespaceURI, localName):
1169        for n in self._seq:
1170            if n.namespaceURI == namespaceURI and n.localName == localName:
1171                return n
1172
1173    def __getitem__(self, name_or_tuple):
1174        if isinstance(name_or_tuple, tuple):
1175            node = self.getNamedItemNS(*name_or_tuple)
1176        else:
1177            node = self.getNamedItem(name_or_tuple)
1178        if node is None:
1179            raise KeyError, name_or_tuple
1180        return node
1181
1182    def item(self, index):
1183        if index < 0:
1184            return None
1185        try:
1186            return self._seq[index]
1187        except IndexError:
1188            return None
1189
1190    def removeNamedItem(self, name):
1191        raise xml.dom.NoModificationAllowedErr(
1192            "NamedNodeMap instance is read-only")
1193
1194    def removeNamedItemNS(self, namespaceURI, localName):
1195        raise xml.dom.NoModificationAllowedErr(
1196            "NamedNodeMap instance is read-only")
1197
1198    def setNamedItem(self, node):
1199        raise xml.dom.NoModificationAllowedErr(
1200            "NamedNodeMap instance is read-only")
1201
1202    def setNamedItemNS(self, node):
1203        raise xml.dom.NoModificationAllowedErr(
1204            "NamedNodeMap instance is read-only")
1205
1206    def __getstate__(self):
1207        return [self._seq]
1208
1209    def __setstate__(self, state):
1210        self._seq = state[0]
1211
1212defproperty(ReadOnlySequentialNamedNodeMap, "length",
1213            doc="Number of entries in the NamedNodeMap.")
1214
1215
1216class Identified:
1217    """Mix-in class that supports the publicId and systemId attributes."""
1218
1219    # XXX this does not work, this is an old-style class
1220    # __slots__ = 'publicId', 'systemId'
1221
1222    def _identified_mixin_init(self, publicId, systemId):
1223        self.publicId = publicId
1224        self.systemId = systemId
1225
1226    def _get_publicId(self):
1227        return self.publicId
1228
1229    def _get_systemId(self):
1230        return self.systemId
1231
1232class DocumentType(Identified, Childless, Node):
1233    nodeType = Node.DOCUMENT_TYPE_NODE
1234    nodeValue = None
1235    name = None
1236    publicId = None
1237    systemId = None
1238    internalSubset = None
1239
1240    def __init__(self, qualifiedName):
1241        self.entities = ReadOnlySequentialNamedNodeMap()
1242        self.notations = ReadOnlySequentialNamedNodeMap()
1243        if qualifiedName:
1244            prefix, localname = _nssplit(qualifiedName)
1245            self.name = localname
1246        self.nodeName = self.name
1247
1248    def _get_internalSubset(self):
1249        return self.internalSubset
1250
1251    def cloneNode(self, deep):
1252        if self.ownerDocument is None:
1253            # it's ok
1254            clone = DocumentType(None)
1255            clone.name = self.name
1256            clone.nodeName = self.name
1257            operation = xml.dom.UserDataHandler.NODE_CLONED
1258            if deep:
1259                clone.entities._seq = []
1260                clone.notations._seq = []
1261                for n in self.notations._seq:
1262                    notation = Notation(n.nodeName, n.publicId, n.systemId)
1263                    clone.notations._seq.append(notation)
1264                    n._call_user_data_handler(operation, n, notation)
1265                for e in self.entities._seq:
1266                    entity = Entity(e.nodeName, e.publicId, e.systemId,
1267                                    e.notationName)
1268                    entity.actualEncoding = e.actualEncoding
1269                    entity.encoding = e.encoding
1270                    entity.version = e.version
1271                    clone.entities._seq.append(entity)
1272                    e._call_user_data_handler(operation, n, entity)
1273            self._call_user_data_handler(operation, self, clone)
1274            return clone
1275        else:
1276            return None
1277
1278    def writexml(self, writer, indent="", addindent="", newl=""):
1279        writer.write("<!DOCTYPE ")
1280        writer.write(self.name)
1281        if self.publicId:
1282            writer.write("%s  PUBLIC '%s'%s  '%s'"
1283                         % (newl, self.publicId, newl, self.systemId))
1284        elif self.systemId:
1285            writer.write("%s  SYSTEM '%s'" % (newl, self.systemId))
1286        if self.internalSubset is not None:
1287            writer.write(" [")
1288            writer.write(self.internalSubset)
1289            writer.write("]")
1290        writer.write(">"+newl)
1291
1292class Entity(Identified, Node):
1293    attributes = None
1294    nodeType = Node.ENTITY_NODE
1295    nodeValue = None
1296
1297    actualEncoding = None
1298    encoding = None
1299    version = None
1300
1301    def __init__(self, name, publicId, systemId, notation):
1302        self.nodeName = name
1303        self.notationName = notation
1304        self.childNodes = NodeList()
1305        self._identified_mixin_init(publicId, systemId)
1306
1307    def _get_actualEncoding(self):
1308        return self.actualEncoding
1309
1310    def _get_encoding(self):
1311        return self.encoding
1312
1313    def _get_version(self):
1314        return self.version
1315
1316    def appendChild(self, newChild):
1317        raise xml.dom.HierarchyRequestErr(
1318            "cannot append children to an entity node")
1319
1320    def insertBefore(self, newChild, refChild):
1321        raise xml.dom.HierarchyRequestErr(
1322            "cannot insert children below an entity node")
1323
1324    def removeChild(self, oldChild):
1325        raise xml.dom.HierarchyRequestErr(
1326            "cannot remove children from an entity node")
1327
1328    def replaceChild(self, newChild, oldChild):
1329        raise xml.dom.HierarchyRequestErr(
1330            "cannot replace children of an entity node")
1331
1332class Notation(Identified, Childless, Node):
1333    nodeType = Node.NOTATION_NODE
1334    nodeValue = None
1335
1336    def __init__(self, name, publicId, systemId):
1337        self.nodeName = name
1338        self._identified_mixin_init(publicId, systemId)
1339
1340
1341class DOMImplementation(DOMImplementationLS):
1342    _features = [("core", "1.0"),
1343                 ("core", "2.0"),
1344                 ("core", None),
1345                 ("xml", "1.0"),
1346                 ("xml", "2.0"),
1347                 ("xml", None),
1348                 ("ls-load", "3.0"),
1349                 ("ls-load", None),
1350                 ]
1351
1352    def hasFeature(self, feature, version):
1353        if version == "":
1354            version = None
1355        return (feature.lower(), version) in self._features
1356
1357    def createDocument(self, namespaceURI, qualifiedName, doctype):
1358        if doctype and doctype.parentNode is not None:
1359            raise xml.dom.WrongDocumentErr(
1360                "doctype object owned by another DOM tree")
1361        doc = self._create_document()
1362
1363        add_root_element = not (namespaceURI is None
1364                                and qualifiedName is None
1365                                and doctype is None)
1366
1367        if not qualifiedName and add_root_element:
1368            # The spec is unclear what to raise here; SyntaxErr
1369            # would be the other obvious candidate. Since Xerces raises
1370            # InvalidCharacterErr, and since SyntaxErr is not listed
1371            # for createDocument, that seems to be the better choice.
1372            # XXX: need to check for illegal characters here and in
1373            # createElement.
1374
1375            # DOM Level III clears this up when talking about the return value
1376            # of this function.  If namespaceURI, qName and DocType are
1377            # Null the document is returned without a document element
1378            # Otherwise if doctype or namespaceURI are not None
1379            # Then we go back to the above problem
1380            raise xml.dom.InvalidCharacterErr("Element with no name")
1381
1382        if add_root_element:
1383            prefix, localname = _nssplit(qualifiedName)
1384            if prefix == "xml" \
1385               and namespaceURI != "http://www.w3.org/XML/1998/namespace":
1386                raise xml.dom.NamespaceErr("illegal use of 'xml' prefix")
1387            if prefix and not namespaceURI:
1388                raise xml.dom.NamespaceErr(
1389                    "illegal use of prefix without namespaces")
1390            element = doc.createElementNS(namespaceURI, qualifiedName)
1391            if doctype:
1392                doc.appendChild(doctype)
1393            doc.appendChild(element)
1394
1395        if doctype:
1396            doctype.parentNode = doctype.ownerDocument = doc
1397
1398        doc.doctype = doctype
1399        doc.implementation = self
1400        return doc
1401
1402    def createDocumentType(self, qualifiedName, publicId, systemId):
1403        doctype = DocumentType(qualifiedName)
1404        doctype.publicId = publicId
1405        doctype.systemId = systemId
1406        return doctype
1407
1408    # DOM Level 3 (WD 9 April 2002)
1409
1410    def getInterface(self, feature):
1411        if self.hasFeature(feature, None):
1412            return self
1413        else:
1414            return None
1415
1416    # internal
1417    def _create_document(self):
1418        return Document()
1419
1420class ElementInfo(object):
1421    """Object that represents content-model information for an element.
1422
1423    This implementation is not expected to be used in practice; DOM
1424    builders should provide implementations which do the right thing
1425    using information available to it.
1426
1427    """
1428
1429    __slots__ = 'tagName',
1430
1431    def __init__(self, name):
1432        self.tagName = name
1433
1434    def getAttributeType(self, aname):
1435        return _no_type
1436
1437    def getAttributeTypeNS(self, namespaceURI, localName):
1438        return _no_type
1439
1440    def isElementContent(self):
1441        return False
1442
1443    def isEmpty(self):
1444        """Returns true iff this element is declared to have an EMPTY
1445        content model."""
1446        return False
1447
1448    def isId(self, aname):
1449        """Returns true iff the named attribute is a DTD-style ID."""
1450        return False
1451
1452    def isIdNS(self, namespaceURI, localName):
1453        """Returns true iff the identified attribute is a DTD-style ID."""
1454        return False
1455
1456    def __getstate__(self):
1457        return self.tagName
1458
1459    def __setstate__(self, state):
1460        self.tagName = state
1461
1462def _clear_id_cache(node):
1463    if node.nodeType == Node.DOCUMENT_NODE:
1464        node._id_cache.clear()
1465        node._id_search_stack = None
1466    elif _in_document(node):
1467        node.ownerDocument._id_cache.clear()
1468        node.ownerDocument._id_search_stack= None
1469
1470class Document(Node, DocumentLS):
1471    _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
1472                         Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE)
1473
1474    nodeType = Node.DOCUMENT_NODE
1475    nodeName = "#document"
1476    nodeValue = None
1477    attributes = None
1478    doctype = None
1479    parentNode = None
1480    previousSibling = nextSibling = None
1481
1482    implementation = DOMImplementation()
1483
1484    # Document attributes from Level 3 (WD 9 April 2002)
1485
1486    actualEncoding = None
1487    encoding = None
1488    standalone = None
1489    version = None
1490    strictErrorChecking = False
1491    errorHandler = None
1492    documentURI = None
1493
1494    _magic_id_count = 0
1495
1496    def __init__(self):
1497        self.childNodes = NodeList()
1498        # mapping of (namespaceURI, localName) -> ElementInfo
1499        #        and tagName -> ElementInfo
1500        self._elem_info = {}
1501        self._id_cache = {}
1502        self._id_search_stack = None
1503
1504    def _get_elem_info(self, element):
1505        if element.namespaceURI:
1506            key = element.namespaceURI, element.localName
1507        else:
1508            key = element.tagName
1509        return self._elem_info.get(key)
1510
1511    def _get_actualEncoding(self):
1512        return self.actualEncoding
1513
1514    def _get_doctype(self):
1515        return self.doctype
1516
1517    def _get_documentURI(self):
1518        return self.documentURI
1519
1520    def _get_encoding(self):
1521        return self.encoding
1522
1523    def _get_errorHandler(self):
1524        return self.errorHandler
1525
1526    def _get_standalone(self):
1527        return self.standalone
1528
1529    def _get_strictErrorChecking(self):
1530        return self.strictErrorChecking
1531
1532    def _get_version(self):
1533        return self.version
1534
1535    def appendChild(self, node):
1536        if node.nodeType not in self._child_node_types:
1537            raise xml.dom.HierarchyRequestErr(
1538                "%s cannot be child of %s" % (repr(node), repr(self)))
1539        if node.parentNode is not None:
1540            # This needs to be done before the next test since this
1541            # may *be* the document element, in which case it should
1542            # end up re-ordered to the end.
1543            node.parentNode.removeChild(node)
1544
1545        if node.nodeType == Node.ELEMENT_NODE \
1546           and self._get_documentElement():
1547            raise xml.dom.HierarchyRequestErr(
1548                "two document elements disallowed")
1549        return Node.appendChild(self, node)
1550
1551    def removeChild(self, oldChild):
1552        try:
1553            self.childNodes.remove(oldChild)
1554        except ValueError:
1555            raise xml.dom.NotFoundErr()
1556        oldChild.nextSibling = oldChild.previousSibling = None
1557        oldChild.parentNode = None
1558        if self.documentElement is oldChild:
1559            self.documentElement = None
1560
1561        return oldChild
1562
1563    def _get_documentElement(self):
1564        for node in self.childNodes:
1565            if node.nodeType == Node.ELEMENT_NODE:
1566                return node
1567
1568    def unlink(self):
1569        if self.doctype is not None:
1570            self.doctype.unlink()
1571            self.doctype = None
1572        Node.unlink(self)
1573
1574    def cloneNode(self, deep):
1575        if not deep:
1576            return None
1577        clone = self.implementation.createDocument(None, None, None)
1578        clone.encoding = self.encoding
1579        clone.standalone = self.standalone
1580        clone.version = self.version
1581        for n in self.childNodes:
1582            childclone = _clone_node(n, deep, clone)
1583            assert childclone.ownerDocument.isSameNode(clone)
1584            clone.childNodes.append(childclone)
1585            if childclone.nodeType == Node.DOCUMENT_NODE:
1586                assert clone.documentElement is None
1587            elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE:
1588                assert clone.doctype is None
1589                clone.doctype = childclone
1590            childclone.parentNode = clone
1591        self._call_user_data_handler(xml.dom.UserDataHandler.NODE_CLONED,
1592                                     self, clone)
1593        return clone
1594
1595    def createDocumentFragment(self):
1596        d = DocumentFragment()
1597        d.ownerDocument = self
1598        return d
1599
1600    def createElement(self, tagName):
1601        e = Element(tagName)
1602        e.ownerDocument = self
1603        return e
1604
1605    def createTextNode(self, data):
1606        if not isinstance(data, StringTypes):
1607            raise TypeError, "node contents must be a string"
1608        t = Text()
1609        t.data = data
1610        t.ownerDocument = self
1611        return t
1612
1613    def createCDATASection(self, data):
1614        if not isinstance(data, StringTypes):
1615            raise TypeError, "node contents must be a string"
1616        c = CDATASection()
1617        c.data = data
1618        c.ownerDocument = self
1619        return c
1620
1621    def createComment(self, data):
1622        c = Comment(data)
1623        c.ownerDocument = self
1624        return c
1625
1626    def createProcessingInstruction(self, target, data):
1627        p = ProcessingInstruction(target, data)
1628        p.ownerDocument = self
1629        return p
1630
1631    def createAttribute(self, qName):
1632        a = Attr(qName)
1633        a.ownerDocument = self
1634        a.value = ""
1635        return a
1636
1637    def createElementNS(self, namespaceURI, qualifiedName):
1638        prefix, localName = _nssplit(qualifiedName)
1639        e = Element(qualifiedName, namespaceURI, prefix)
1640        e.ownerDocument = self
1641        return e
1642
1643    def createAttributeNS(self, namespaceURI, qualifiedName):
1644        prefix, localName = _nssplit(qualifiedName)
1645        a = Attr(qualifiedName, namespaceURI, localName, prefix)
1646        a.ownerDocument = self
1647        a.value = ""
1648        return a
1649
1650    # A couple of implementation-specific helpers to create node types
1651    # not supported by the W3C DOM specs:
1652
1653    def _create_entity(self, name, publicId, systemId, notationName):
1654        e = Entity(name, publicId, systemId, notationName)
1655        e.ownerDocument = self
1656        return e
1657
1658    def _create_notation(self, name, publicId, systemId):
1659        n = Notation(name, publicId, systemId)
1660        n.ownerDocument = self
1661        return n
1662
1663    def getElementById(self, id):
1664        if id in self._id_cache:
1665            return self._id_cache[id]
1666        if not (self._elem_info or self._magic_id_count):
1667            return None
1668
1669        stack = self._id_search_stack
1670        if stack is None:
1671            # we never searched before, or the cache has been cleared
1672            stack = [self.documentElement]
1673            self._id_search_stack = stack
1674        elif not stack:
1675            # Previous search was completed and cache is still valid;
1676            # no matching node.
1677            return None
1678
1679        result = None
1680        while stack:
1681            node = stack.pop()
1682            # add child elements to stack for continued searching
1683            stack.extend([child for child in node.childNodes
1684                          if child.nodeType in _nodeTypes_with_children])
1685            # check this node
1686            info = self._get_elem_info(node)
1687            if info:
1688                # We have to process all ID attributes before
1689                # returning in order to get all the attributes set to
1690                # be IDs using Element.setIdAttribute*().
1691                for attr in node.attributes.values():
1692                    if attr.namespaceURI:
1693                        if info.isIdNS(attr.namespaceURI, attr.localName):
1694                            self._id_cache[attr.value] = node
1695                            if attr.value == id:
1696                                result = node
1697                            elif not node._magic_id_nodes:
1698                                break
1699                    elif info.isId(attr.name):
1700                        self._id_cache[attr.value] = node
1701                        if attr.value == id:
1702                            result = node
1703                        elif not node._magic_id_nodes:
1704                            break
1705                    elif attr._is_id:
1706                        self._id_cache[attr.value] = node
1707                        if attr.value == id:
1708                            result = node
1709                        elif node._magic_id_nodes == 1:
1710                            break
1711            elif node._magic_id_nodes:
1712                for attr in node.attributes.values():
1713                    if attr._is_id:
1714                        self._id_cache[attr.value] = node
1715                        if attr.value == id:
1716                            result = node
1717            if result is not None:
1718                break
1719        return result
1720
1721    def getElementsByTagName(self, name):
1722        return _get_elements_by_tagName_helper(self, name, NodeList())
1723
1724    def getElementsByTagNameNS(self, namespaceURI, localName):
1725        return _get_elements_by_tagName_ns_helper(
1726            self, namespaceURI, localName, NodeList())
1727
1728    def isSupported(self, feature, version):
1729        return self.implementation.hasFeature(feature, version)
1730
1731    def importNode(self, node, deep):
1732        if node.nodeType == Node.DOCUMENT_NODE:
1733            raise xml.dom.NotSupportedErr("cannot import document nodes")
1734        elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
1735            raise xml.dom.NotSupportedErr("cannot import document type nodes")
1736        return _clone_node(node, deep, self)
1737
1738    def writexml(self, writer, indent="", addindent="", newl="",
1739                 encoding = None):
1740        if encoding is None:
1741            writer.write('<?xml version="1.0" ?>'+newl)
1742        else:
1743            writer.write('<?xml version="1.0" encoding="%s"?>%s' % (encoding, newl))
1744        for node in self.childNodes:
1745            node.writexml(writer, indent, addindent, newl)
1746
1747    # DOM Level 3 (WD 9 April 2002)
1748
1749    def renameNode(self, n, namespaceURI, name):
1750        if n.ownerDocument is not self:
1751            raise xml.dom.WrongDocumentErr(
1752                "cannot rename nodes from other documents;\n"
1753                "expected %s,\nfound %s" % (self, n.ownerDocument))
1754        if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE):
1755            raise xml.dom.NotSupportedErr(
1756                "renameNode() only applies to element and attribute nodes")
1757        if namespaceURI != EMPTY_NAMESPACE:
1758            if ':' in name:
1759                prefix, localName = name.split(':', 1)
1760                if (  prefix == "xmlns"
1761                      and namespaceURI != xml.dom.XMLNS_NAMESPACE):
1762                    raise xml.dom.NamespaceErr(
1763                        "illegal use of 'xmlns' prefix")
1764            else:
1765                if (  name == "xmlns"
1766                      and namespaceURI != xml.dom.XMLNS_NAMESPACE
1767                      and n.nodeType == Node.ATTRIBUTE_NODE):
1768                    raise xml.dom.NamespaceErr(
1769                        "illegal use of the 'xmlns' attribute")
1770                prefix = None
1771                localName = name
1772        else:
1773            prefix = None
1774            localName = None
1775        if n.nodeType == Node.ATTRIBUTE_NODE:
1776            element = n.ownerElement
1777            if element is not None:
1778                is_id = n._is_id
1779                element.removeAttributeNode(n)
1780        else:
1781            element = None
1782        # avoid __setattr__
1783        d = n.__dict__
1784        d['prefix'] = prefix
1785        d['localName'] = localName
1786        d['namespaceURI'] = namespaceURI
1787        d['nodeName'] = name
1788        if n.nodeType == Node.ELEMENT_NODE:
1789            d['tagName'] = name
1790        else:
1791            # attribute node
1792            d['name'] = name
1793            if element is not None:
1794                element.setAttributeNode(n)
1795                if is_id:
1796                    element.setIdAttributeNode(n)
1797        # It's not clear from a semantic perspective whether we should
1798        # call the user data handlers for the NODE_RENAMED event since
1799        # we're re-using the existing node.  The draft spec has been
1800        # interpreted as meaning "no, don't call the handler unless a
1801        # new node is created."
1802        return n
1803
1804defproperty(Document, "documentElement",
1805            doc="Top-level element of this document.")
1806
1807
1808def _clone_node(node, deep, newOwnerDocument):
1809    """
1810    Clone a node and give it the new owner document.
1811    Called by Node.cloneNode and Document.importNode
1812    """
1813    if node.ownerDocument.isSameNode(newOwnerDocument):
1814        operation = xml.dom.UserDataHandler.NODE_CLONED
1815    else:
1816        operation = xml.dom.UserDataHandler.NODE_IMPORTED
1817    if node.nodeType == Node.ELEMENT_NODE:
1818        clone = newOwnerDocument.createElementNS(node.namespaceURI,
1819                                                 node.nodeName)
1820        for attr in node.attributes.values():
1821            clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value)
1822            a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName)
1823            a.specified = attr.specified
1824
1825        if deep:
1826            for child in node.childNodes:
1827                c = _clone_node(child, deep, newOwnerDocument)
1828                clone.appendChild(c)
1829
1830    elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
1831        clone = newOwnerDocument.createDocumentFragment()
1832        if deep:
1833            for child in node.childNodes:
1834                c = _clone_node(child, deep, newOwnerDocument)
1835                clone.appendChild(c)
1836
1837    elif node.nodeType == Node.TEXT_NODE:
1838        clone = newOwnerDocument.createTextNode(node.data)
1839    elif node.nodeType == Node.CDATA_SECTION_NODE:
1840        clone = newOwnerDocument.createCDATASection(node.data)
1841    elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
1842        clone = newOwnerDocument.createProcessingInstruction(node.target,
1843                                                             node.data)
1844    elif node.nodeType == Node.COMMENT_NODE:
1845        clone = newOwnerDocument.createComment(node.data)
1846    elif node.nodeType == Node.ATTRIBUTE_NODE:
1847        clone = newOwnerDocument.createAttributeNS(node.namespaceURI,
1848                                                   node.nodeName)
1849        clone.specified = True
1850        clone.value = node.value
1851    elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
1852        assert node.ownerDocument is not newOwnerDocument
1853        operation = xml.dom.UserDataHandler.NODE_IMPORTED
1854        clone = newOwnerDocument.implementation.createDocumentType(
1855            node.name, node.publicId, node.systemId)
1856        clone.ownerDocument = newOwnerDocument
1857        if deep:
1858            clone.entities._seq = []
1859            clone.notations._seq = []
1860            for n in node.notations._seq:
1861                notation = Notation(n.nodeName, n.publicId, n.systemId)
1862                notation.ownerDocument = newOwnerDocument
1863                clone.notations._seq.append(notation)
1864                if hasattr(n, '_call_user_data_handler'):
1865                    n._call_user_data_handler(operation, n, notation)
1866            for e in node.entities._seq:
1867                entity = Entity(e.nodeName, e.publicId, e.systemId,
1868                                e.notationName)
1869                entity.actualEncoding = e.actualEncoding
1870                entity.encoding = e.encoding
1871                entity.version = e.version
1872                entity.ownerDocument = newOwnerDocument
1873                clone.entities._seq.append(entity)
1874                if hasattr(e, '_call_user_data_handler'):
1875                    e._call_user_data_handler(operation, n, entity)
1876    else:
1877        # Note the cloning of Document and DocumentType nodes is
1878        # implementation specific.  minidom handles those cases
1879        # directly in the cloneNode() methods.
1880        raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node))
1881
1882    # Check for _call_user_data_handler() since this could conceivably
1883    # used with other DOM implementations (one of the FourThought
1884    # DOMs, perhaps?).
1885    if hasattr(node, '_call_user_data_handler'):
1886        node._call_user_data_handler(operation, node, clone)
1887    return clone
1888
1889
1890def _nssplit(qualifiedName):
1891    fields = qualifiedName.split(':', 1)
1892    if len(fields) == 2:
1893        return fields
1894    else:
1895        return (None, fields[0])
1896
1897
1898def _get_StringIO():
1899    # we can't use cStringIO since it doesn't support Unicode strings
1900    from StringIO import StringIO
1901    return StringIO()
1902
1903def _do_pulldom_parse(func, args, kwargs):
1904    events = func(*args, **kwargs)
1905    toktype, rootNode = events.getEvent()
1906    events.expandNode(rootNode)
1907    events.clear()
1908    return rootNode
1909
1910def parse(file, parser=None, bufsize=None):
1911    """Parse a file into a DOM by filename or file object."""
1912    if parser is None and not bufsize:
1913        from xml.dom import expatbuilder
1914        return expatbuilder.parse(file)
1915    else:
1916        from xml.dom import pulldom
1917        return _do_pulldom_parse(pulldom.parse, (file,),
1918            {'parser': parser, 'bufsize': bufsize})
1919
1920def parseString(string, parser=None):
1921    """Parse a file into a DOM from a string."""
1922    if parser is None:
1923        from xml.dom import expatbuilder
1924        return expatbuilder.parseString(string)
1925    else:
1926        from xml.dom import pulldom
1927        return _do_pulldom_parse(pulldom.parseString, (string,),
1928                                 {'parser': parser})
1929
1930def getDOMImplementation(features=None):
1931    if features:
1932        if isinstance(features, StringTypes):
1933            features = domreg._parse_feature_string(features)
1934        for f, v in features:
1935            if not Document.implementation.hasFeature(f, v):
1936                return None
1937    return Document.implementation
1938