1"""Simple implementation of the Level 1 DOM.
2
3Namespaces and other minor Level 2 features are also supported.
4
5parse("foo.xml")
6
7parseString("<foo><bar/></foo>")
8
9Todo:
10=====
11 * convenience methods for getting elements and text.
12 * more testing
13 * bring some of the writer and linearizer code into conformance with this
14        interface
15 * SAX 2 namespaces
16"""
17
18import xml.dom
19
20from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg
21from xml.dom.minicompat import *
22from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS
23
24# This is used by the ID-cache invalidation checks; the list isn't
25# actually complete, since the nodes being checked will never be the
26# DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE.  (The node being checked is
27# the node being added or removed, not the node being modified.)
28#
29_nodeTypes_with_children = (xml.dom.Node.ELEMENT_NODE,
30                            xml.dom.Node.ENTITY_REFERENCE_NODE)
31
32
33class Node(xml.dom.Node):
34    namespaceURI = None # this is non-null only for elements and attributes
35    parentNode = None
36    ownerDocument = None
37    nextSibling = None
38    previousSibling = None
39
40    prefix = EMPTY_PREFIX # non-null only for NS elements and attributes
41
42    def __nonzero__(self):
43        return True
44
45    def toxml(self, encoding = None):
46        return self.toprettyxml("", "", encoding)
47
48    def toprettyxml(self, indent="\t", newl="\n", encoding = None):
49        # indent = the indentation string to prepend, per level
50        # newl = the newline string to append
51        writer = _get_StringIO()
52        if encoding is not None:
53            import codecs
54            # Can't use codecs.getwriter to preserve 2.0 compatibility
55            writer = codecs.lookup(encoding)[3](writer)
56        if self.nodeType == Node.DOCUMENT_NODE:
57            # Can pass encoding only to document, to put it into XML header
58            self.writexml(writer, "", indent, newl, encoding)
59        else:
60            self.writexml(writer, "", indent, newl)
61        return writer.getvalue()
62
63    def hasChildNodes(self):
64        if self.childNodes:
65            return True
66        else:
67            return False
68
69    def _get_childNodes(self):
70        return self.childNodes
71
72    def _get_firstChild(self):
73        if self.childNodes:
74            return self.childNodes[0]
75
76    def _get_lastChild(self):
77        if self.childNodes:
78            return self.childNodes[-1]
79
80    def insertBefore(self, newChild, refChild):
81        if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
82            for c in tuple(newChild.childNodes):
83                self.insertBefore(c, refChild)
84            ### The DOM does not clearly specify what to return in this case
85            return newChild
86        if newChild.nodeType not in self._child_node_types:
87            raise xml.dom.HierarchyRequestErr(
88                "%s cannot be child of %s" % (repr(newChild), repr(self)))
89        if newChild.parentNode is not None:
90            newChild.parentNode.removeChild(newChild)
91        if refChild is None:
92            self.appendChild(newChild)
93        else:
94            try:
95                index = self.childNodes.index(refChild)
96            except ValueError:
97                raise xml.dom.NotFoundErr()
98            if newChild.nodeType in _nodeTypes_with_children:
99                _clear_id_cache(self)
100            self.childNodes.insert(index, newChild)
101            newChild.nextSibling = refChild
102            refChild.previousSibling = newChild
103            if index:
104                node = self.childNodes[index-1]
105                node.nextSibling = newChild
106                newChild.previousSibling = node
107            else:
108                newChild.previousSibling = None
109            newChild.parentNode = self
110        return newChild
111
112    def appendChild(self, node):
113        if node.nodeType == self.DOCUMENT_FRAGMENT_NODE:
114            for c in tuple(node.childNodes):
115                self.appendChild(c)
116            ### The DOM does not clearly specify what to return in this case
117            return node
118        if node.nodeType not in self._child_node_types:
119            raise xml.dom.HierarchyRequestErr(
120                "%s cannot be child of %s" % (repr(node), repr(self)))
121        elif node.nodeType in _nodeTypes_with_children:
122            _clear_id_cache(self)
123        if node.parentNode is not None:
124            node.parentNode.removeChild(node)
125        _append_child(self, node)
126        node.nextSibling = None
127        return node
128
129    def replaceChild(self, newChild, oldChild):
130        if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
131            refChild = oldChild.nextSibling
132            self.removeChild(oldChild)
133            return self.insertBefore(newChild, refChild)
134        if newChild.nodeType not in self._child_node_types:
135            raise xml.dom.HierarchyRequestErr(
136                "%s cannot be child of %s" % (repr(newChild), repr(self)))
137        if newChild is oldChild:
138            return
139        if newChild.parentNode is not None:
140            newChild.parentNode.removeChild(newChild)
141        try:
142            index = self.childNodes.index(oldChild)
143        except ValueError:
144            raise xml.dom.NotFoundErr()
145        self.childNodes[index] = newChild
146        newChild.parentNode = self
147        oldChild.parentNode = None
148        if (newChild.nodeType in _nodeTypes_with_children
149            or oldChild.nodeType in _nodeTypes_with_children):
150            _clear_id_cache(self)
151        newChild.nextSibling = oldChild.nextSibling
152        newChild.previousSibling = oldChild.previousSibling
153        oldChild.nextSibling = None
154        oldChild.previousSibling = None
155        if newChild.previousSibling:
156            newChild.previousSibling.nextSibling = newChild
157        if newChild.nextSibling:
158            newChild.nextSibling.previousSibling = newChild
159        return oldChild
160
161    def removeChild(self, oldChild):
162        try:
163            self.childNodes.remove(oldChild)
164        except ValueError:
165            raise xml.dom.NotFoundErr()
166        if oldChild.nextSibling is not None:
167            oldChild.nextSibling.previousSibling = oldChild.previousSibling
168        if oldChild.previousSibling is not None:
169            oldChild.previousSibling.nextSibling = oldChild.nextSibling
170        oldChild.nextSibling = oldChild.previousSibling = None
171        if oldChild.nodeType in _nodeTypes_with_children:
172            _clear_id_cache(self)
173
174        oldChild.parentNode = None
175        return oldChild
176
177    def normalize(self):
178        L = []
179        for child in self.childNodes:
180            if child.nodeType == Node.TEXT_NODE:
181                if not child.data:
182                    # empty text node; discard
183                    if L:
184                        L[-1].nextSibling = child.nextSibling
185                    if child.nextSibling:
186                        child.nextSibling.previousSibling = child.previousSibling
187                    child.unlink()
188                elif L and L[-1].nodeType == child.nodeType:
189                    # collapse text node
190                    node = L[-1]
191                    node.data = node.data + child.data
192                    node.nextSibling = child.nextSibling
193                    if child.nextSibling:
194                        child.nextSibling.previousSibling = node
195                    child.unlink()
196                else:
197                    L.append(child)
198            else:
199                L.append(child)
200                if child.nodeType == Node.ELEMENT_NODE:
201                    child.normalize()
202        self.childNodes[:] = L
203
204    def cloneNode(self, deep):
205        return _clone_node(self, deep, self.ownerDocument or self)
206
207    def isSupported(self, feature, version):
208        return self.ownerDocument.implementation.hasFeature(feature, version)
209
210    def _get_localName(self):
211        # Overridden in Element and Attr where localName can be Non-Null
212        return None
213
214    # Node interfaces from Level 3 (WD 9 April 2002)
215
216    def isSameNode(self, other):
217        return self is other
218
219    def getInterface(self, feature):
220        if self.isSupported(feature, None):
221            return self
222        else:
223            return None
224
225    # The "user data" functions use a dictionary that is only present
226    # if some user data has been set, so be careful not to assume it
227    # exists.
228
229    def getUserData(self, key):
230        try:
231            return self._user_data[key][0]
232        except (AttributeError, KeyError):
233            return None
234
235    def setUserData(self, key, data, handler):
236        old = None
237        try:
238            d = self._user_data
239        except AttributeError:
240            d = {}
241            self._user_data = d
242        if key in d:
243            old = d[key][0]
244        if data is None:
245            # ignore handlers passed for None
246            handler = None
247            if old is not None:
248                del d[key]
249        else:
250            d[key] = (data, handler)
251        return old
252
253    def _call_user_data_handler(self, operation, src, dst):
254        if hasattr(self, "_user_data"):
255            for key, (data, handler) in self._user_data.items():
256                if handler is not None:
257                    handler.handle(operation, key, data, src, dst)
258
259    # minidom-specific API:
260
261    def unlink(self):
262        self.parentNode = self.ownerDocument = None
263        if self.childNodes:
264            for child in self.childNodes:
265                child.unlink()
266            self.childNodes = NodeList()
267        self.previousSibling = None
268        self.nextSibling = None
269
270defproperty(Node, "firstChild", doc="First child node, or None.")
271defproperty(Node, "lastChild",  doc="Last child node, or None.")
272defproperty(Node, "localName",  doc="Namespace-local name of this node.")
273
274
275def _append_child(self, node):
276    # fast path with less checks; usable by DOM builders if careful
277    childNodes = self.childNodes
278    if childNodes:
279        last = childNodes[-1]
280        node.__dict__["previousSibling"] = last
281        last.__dict__["nextSibling"] = node
282    childNodes.append(node)
283    node.__dict__["parentNode"] = self
284
285def _in_document(node):
286    # return True iff node is part of a document tree
287    while node is not None:
288        if node.nodeType == Node.DOCUMENT_NODE:
289            return True
290        node = node.parentNode
291    return False
292
293def _write_data(writer, data):
294    "Writes datachars to writer."
295    if data:
296        data = data.replace("&", "&amp;").replace("<", "&lt;"). \
297                    replace("\"", "&quot;").replace(">", "&gt;")
298        writer.write(data)
299
300def _get_elements_by_tagName_helper(parent, name, rc):
301    for node in parent.childNodes:
302        if node.nodeType == Node.ELEMENT_NODE and \
303            (name == "*" or node.tagName == name):
304            rc.append(node)
305        _get_elements_by_tagName_helper(node, name, rc)
306    return rc
307
308def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc):
309    for node in parent.childNodes:
310        if node.nodeType == Node.ELEMENT_NODE:
311            if ((localName == "*" or node.localName == localName) and
312                (nsURI == "*" or node.namespaceURI == nsURI)):
313                rc.append(node)
314            _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc)
315    return rc
316
317class DocumentFragment(Node):
318    nodeType = Node.DOCUMENT_FRAGMENT_NODE
319    nodeName = "#document-fragment"
320    nodeValue = None
321    attributes = None
322    parentNode = None
323    _child_node_types = (Node.ELEMENT_NODE,
324                         Node.TEXT_NODE,
325                         Node.CDATA_SECTION_NODE,
326                         Node.ENTITY_REFERENCE_NODE,
327                         Node.PROCESSING_INSTRUCTION_NODE,
328                         Node.COMMENT_NODE,
329                         Node.NOTATION_NODE)
330
331    def __init__(self):
332        self.childNodes = NodeList()
333
334
335class Attr(Node):
336    nodeType = Node.ATTRIBUTE_NODE
337    attributes = None
338    ownerElement = None
339    specified = False
340    _is_id = False
341
342    _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE)
343
344    def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None,
345                 prefix=None):
346        # skip setattr for performance
347        d = self.__dict__
348        d["nodeName"] = d["name"] = qName
349        d["namespaceURI"] = namespaceURI
350        d["prefix"] = prefix
351        d['childNodes'] = NodeList()
352
353        # Add the single child node that represents the value of the attr
354        self.childNodes.append(Text())
355
356        # nodeValue and value are set elsewhere
357
358    def _get_localName(self):
359        return self.nodeName.split(":", 1)[-1]
360
361    def _get_name(self):
362        return self.name
363
364    def _get_specified(self):
365        return self.specified
366
367    def __setattr__(self, name, value):
368        d = self.__dict__
369        if name in ("value", "nodeValue"):
370            d["value"] = d["nodeValue"] = value
371            d2 = self.childNodes[0].__dict__
372            d2["data"] = d2["nodeValue"] = value
373            if self.ownerElement is not None:
374                _clear_id_cache(self.ownerElement)
375        elif name in ("name", "nodeName"):
376            d["name"] = d["nodeName"] = value
377            if self.ownerElement is not None:
378                _clear_id_cache(self.ownerElement)
379        else:
380            d[name] = value
381
382    def _set_prefix(self, prefix):
383        nsuri = self.namespaceURI
384        if prefix == "xmlns":
385            if nsuri and nsuri != XMLNS_NAMESPACE:
386                raise xml.dom.NamespaceErr(
387                    "illegal use of 'xmlns' prefix for the wrong namespace")
388        d = self.__dict__
389        d['prefix'] = prefix
390        if prefix is None:
391            newName = self.localName
392        else:
393            newName = "%s:%s" % (prefix, self.localName)
394        if self.ownerElement:
395            _clear_id_cache(self.ownerElement)
396        d['nodeName'] = d['name'] = newName
397
398    def _set_value(self, value):
399        d = self.__dict__
400        d['value'] = d['nodeValue'] = value
401        if self.ownerElement:
402            _clear_id_cache(self.ownerElement)
403        self.childNodes[0].data = value
404
405    def unlink(self):
406        # This implementation does not call the base implementation
407        # since most of that is not needed, and the expense of the
408        # method call is not warranted.  We duplicate the removal of
409        # children, but that's all we needed from the base class.
410        elem = self.ownerElement
411        if elem is not None:
412            del elem._attrs[self.nodeName]
413            del elem._attrsNS[(self.namespaceURI, self.localName)]
414            if self._is_id:
415                self._is_id = False
416                elem._magic_id_nodes -= 1
417                self.ownerDocument._magic_id_count -= 1
418        for child in self.childNodes:
419            child.unlink()
420        del self.childNodes[:]
421
422    def _get_isId(self):
423        if self._is_id:
424            return True
425        doc = self.ownerDocument
426        elem = self.ownerElement
427        if doc is None or elem is None:
428            return False
429
430        info = doc._get_elem_info(elem)
431        if info is None:
432            return False
433        if self.namespaceURI:
434            return info.isIdNS(self.namespaceURI, self.localName)
435        else:
436            return info.isId(self.nodeName)
437
438    def _get_schemaType(self):
439        doc = self.ownerDocument
440        elem = self.ownerElement
441        if doc is None or elem is None:
442            return _no_type
443
444        info = doc._get_elem_info(elem)
445        if info is None:
446            return _no_type
447        if self.namespaceURI:
448            return info.getAttributeTypeNS(self.namespaceURI, self.localName)
449        else:
450            return info.getAttributeType(self.nodeName)
451
452defproperty(Attr, "isId",       doc="True if this attribute is an ID.")
453defproperty(Attr, "localName",  doc="Namespace-local name of this attribute.")
454defproperty(Attr, "schemaType", doc="Schema type for this attribute.")
455
456
457class NamedNodeMap(object):
458    """The attribute list is a transient interface to the underlying
459    dictionaries.  Mutations here will change the underlying element's
460    dictionary.
461
462    Ordering is imposed artificially and does not reflect the order of
463    attributes as found in an input document.
464    """
465
466    __slots__ = ('_attrs', '_attrsNS', '_ownerElement')
467
468    def __init__(self, attrs, attrsNS, ownerElement):
469        self._attrs = attrs
470        self._attrsNS = attrsNS
471        self._ownerElement = ownerElement
472
473    def _get_length(self):
474        return len(self._attrs)
475
476    def item(self, index):
477        try:
478            return self[self._attrs.keys()[index]]
479        except IndexError:
480            return None
481
482    def items(self):
483        L = []
484        for node in self._attrs.values():
485            L.append((node.nodeName, node.value))
486        return L
487
488    def itemsNS(self):
489        L = []
490        for node in self._attrs.values():
491            L.append(((node.namespaceURI, node.localName), node.value))
492        return L
493
494    def has_key(self, key):
495        if isinstance(key, StringTypes):
496            return key in self._attrs
497        else:
498            return key in self._attrsNS
499
500    def keys(self):
501        return self._attrs.keys()
502
503    def keysNS(self):
504        return self._attrsNS.keys()
505
506    def values(self):
507        return self._attrs.values()
508
509    def get(self, name, value=None):
510        return self._attrs.get(name, value)
511
512    __len__ = _get_length
513
514    __hash__ = None # Mutable type can't be correctly hashed
515    def __cmp__(self, other):
516        if self._attrs is getattr(other, "_attrs", None):
517            return 0
518        else:
519            return cmp(id(self), id(other))
520
521    def __getitem__(self, attname_or_tuple):
522        if isinstance(attname_or_tuple, tuple):
523            return self._attrsNS[attname_or_tuple]
524        else:
525            return self._attrs[attname_or_tuple]
526
527    # same as set
528    def __setitem__(self, attname, value):
529        if isinstance(value, StringTypes):
530            try:
531                node = self._attrs[attname]
532            except KeyError:
533                node = Attr(attname)
534                node.ownerDocument = self._ownerElement.ownerDocument
535                self.setNamedItem(node)
536            node.value = value
537        else:
538            if not isinstance(value, Attr):
539                raise TypeError, "value must be a string or Attr object"
540            node = value
541            self.setNamedItem(node)
542
543    def getNamedItem(self, name):
544        try:
545            return self._attrs[name]
546        except KeyError:
547            return None
548
549    def getNamedItemNS(self, namespaceURI, localName):
550        try:
551            return self._attrsNS[(namespaceURI, localName)]
552        except KeyError:
553            return None
554
555    def removeNamedItem(self, name):
556        n = self.getNamedItem(name)
557        if n is not None:
558            _clear_id_cache(self._ownerElement)
559            del self._attrs[n.nodeName]
560            del self._attrsNS[(n.namespaceURI, n.localName)]
561            if 'ownerElement' in n.__dict__:
562                n.__dict__['ownerElement'] = None
563            return n
564        else:
565            raise xml.dom.NotFoundErr()
566
567    def removeNamedItemNS(self, namespaceURI, localName):
568        n = self.getNamedItemNS(namespaceURI, localName)
569        if n is not None:
570            _clear_id_cache(self._ownerElement)
571            del self._attrsNS[(n.namespaceURI, n.localName)]
572            del self._attrs[n.nodeName]
573            if 'ownerElement' in n.__dict__:
574                n.__dict__['ownerElement'] = None
575            return n
576        else:
577            raise xml.dom.NotFoundErr()
578
579    def setNamedItem(self, node):
580        if not isinstance(node, Attr):
581            raise xml.dom.HierarchyRequestErr(
582                "%s cannot be child of %s" % (repr(node), repr(self)))
583        old = self._attrs.get(node.name)
584        if old:
585            old.unlink()
586        self._attrs[node.name] = node
587        self._attrsNS[(node.namespaceURI, node.localName)] = node
588        node.ownerElement = self._ownerElement
589        _clear_id_cache(node.ownerElement)
590        return old
591
592    def setNamedItemNS(self, node):
593        return self.setNamedItem(node)
594
595    def __delitem__(self, attname_or_tuple):
596        node = self[attname_or_tuple]
597        _clear_id_cache(node.ownerElement)
598        node.unlink()
599
600    def __getstate__(self):
601        return self._attrs, self._attrsNS, self._ownerElement
602
603    def __setstate__(self, state):
604        self._attrs, self._attrsNS, self._ownerElement = state
605
606defproperty(NamedNodeMap, "length",
607            doc="Number of nodes in the NamedNodeMap.")
608
609AttributeList = NamedNodeMap
610
611
612class TypeInfo(object):
613    __slots__ = 'namespace', 'name'
614
615    def __init__(self, namespace, name):
616        self.namespace = namespace
617        self.name = name
618
619    def __repr__(self):
620        if self.namespace:
621            return "<TypeInfo %r (from %r)>" % (self.name, self.namespace)
622        else:
623            return "<TypeInfo %r>" % self.name
624
625    def _get_name(self):
626        return self.name
627
628    def _get_namespace(self):
629        return self.namespace
630
631_no_type = TypeInfo(None, None)
632
633class Element(Node):
634    nodeType = Node.ELEMENT_NODE
635    nodeValue = None
636    schemaType = _no_type
637
638    _magic_id_nodes = 0
639
640    _child_node_types = (Node.ELEMENT_NODE,
641                         Node.PROCESSING_INSTRUCTION_NODE,
642                         Node.COMMENT_NODE,
643                         Node.TEXT_NODE,
644                         Node.CDATA_SECTION_NODE,
645                         Node.ENTITY_REFERENCE_NODE)
646
647    def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None,
648                 localName=None):
649        self.tagName = self.nodeName = tagName
650        self.prefix = prefix
651        self.namespaceURI = namespaceURI
652        self.childNodes = NodeList()
653
654        self._attrs = {}   # attributes are double-indexed:
655        self._attrsNS = {} #    tagName -> Attribute
656                           #    URI,localName -> Attribute
657                           # in the future: consider lazy generation
658                           # of attribute objects this is too tricky
659                           # for now because of headaches with
660                           # namespaces.
661
662    def _get_localName(self):
663        return self.tagName.split(":", 1)[-1]
664
665    def _get_tagName(self):
666        return self.tagName
667
668    def unlink(self):
669        for attr in self._attrs.values():
670            attr.unlink()
671        self._attrs = None
672        self._attrsNS = None
673        Node.unlink(self)
674
675    def getAttribute(self, attname):
676        try:
677            return self._attrs[attname].value
678        except KeyError:
679            return ""
680
681    def getAttributeNS(self, namespaceURI, localName):
682        try:
683            return self._attrsNS[(namespaceURI, localName)].value
684        except KeyError:
685            return ""
686
687    def setAttribute(self, attname, value):
688        attr = self.getAttributeNode(attname)
689        if attr is None:
690            attr = Attr(attname)
691            # for performance
692            d = attr.__dict__
693            d["value"] = d["nodeValue"] = value
694            d["ownerDocument"] = self.ownerDocument
695            self.setAttributeNode(attr)
696        elif value != attr.value:
697            d = attr.__dict__
698            d["value"] = d["nodeValue"] = value
699            if attr.isId:
700                _clear_id_cache(self)
701
702    def setAttributeNS(self, namespaceURI, qualifiedName, value):
703        prefix, localname = _nssplit(qualifiedName)
704        attr = self.getAttributeNodeNS(namespaceURI, localname)
705        if attr is None:
706            # for performance
707            attr = Attr(qualifiedName, namespaceURI, localname, prefix)
708            d = attr.__dict__
709            d["prefix"] = prefix
710            d["nodeName"] = qualifiedName
711            d["value"] = d["nodeValue"] = value
712            d["ownerDocument"] = self.ownerDocument
713            self.setAttributeNode(attr)
714        else:
715            d = attr.__dict__
716            if value != attr.value:
717                d["value"] = d["nodeValue"] = value
718                if attr.isId:
719                    _clear_id_cache(self)
720            if attr.prefix != prefix:
721                d["prefix"] = prefix
722                d["nodeName"] = qualifiedName
723
724    def getAttributeNode(self, attrname):
725        return self._attrs.get(attrname)
726
727    def getAttributeNodeNS(self, namespaceURI, localName):
728        return self._attrsNS.get((namespaceURI, localName))
729
730    def setAttributeNode(self, attr):
731        if attr.ownerElement not in (None, self):
732            raise xml.dom.InuseAttributeErr("attribute node already owned")
733        old1 = self._attrs.get(attr.name, None)
734        if old1 is not None:
735            self.removeAttributeNode(old1)
736        old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None)
737        if old2 is not None and old2 is not old1:
738            self.removeAttributeNode(old2)
739        _set_attribute_node(self, attr)
740
741        if old1 is not attr:
742            # It might have already been part of this node, in which case
743            # it doesn't represent a change, and should not be returned.
744            return old1
745        if old2 is not attr:
746            return old2
747
748    setAttributeNodeNS = setAttributeNode
749
750    def removeAttribute(self, name):
751        try:
752            attr = self._attrs[name]
753        except KeyError:
754            raise xml.dom.NotFoundErr()
755        self.removeAttributeNode(attr)
756
757    def removeAttributeNS(self, namespaceURI, localName):
758        try:
759            attr = self._attrsNS[(namespaceURI, localName)]
760        except KeyError:
761            raise xml.dom.NotFoundErr()
762        self.removeAttributeNode(attr)
763
764    def removeAttributeNode(self, node):
765        if node is None:
766            raise xml.dom.NotFoundErr()
767        try:
768            self._attrs[node.name]
769        except KeyError:
770            raise xml.dom.NotFoundErr()
771        _clear_id_cache(self)
772        node.unlink()
773        # Restore this since the node is still useful and otherwise
774        # unlinked
775        node.ownerDocument = self.ownerDocument
776
777    removeAttributeNodeNS = removeAttributeNode
778
779    def hasAttribute(self, name):
780        return name in self._attrs
781
782    def hasAttributeNS(self, namespaceURI, localName):
783        return (namespaceURI, localName) in self._attrsNS
784
785    def getElementsByTagName(self, name):
786        return _get_elements_by_tagName_helper(self, name, NodeList())
787
788    def getElementsByTagNameNS(self, namespaceURI, localName):
789        return _get_elements_by_tagName_ns_helper(
790            self, namespaceURI, localName, NodeList())
791
792    def __repr__(self):
793        return "<DOM Element: %s at %#x>" % (self.tagName, id(self))
794
795    def writexml(self, writer, indent="", addindent="", newl=""):
796        # indent = current indentation
797        # addindent = indentation to add to higher levels
798        # newl = newline string
799        writer.write(indent+"<" + self.tagName)
800
801        attrs = self._get_attributes()
802        a_names = attrs.keys()
803        a_names.sort()
804
805        for a_name in a_names:
806            writer.write(" %s=\"" % a_name)
807            _write_data(writer, attrs[a_name].value)
808            writer.write("\"")
809        if self.childNodes:
810            writer.write(">")
811            if (len(self.childNodes) == 1 and
812                self.childNodes[0].nodeType == Node.TEXT_NODE):
813                self.childNodes[0].writexml(writer, '', '', '')
814            else:
815                writer.write(newl)
816                for node in self.childNodes:
817                    node.writexml(writer, indent+addindent, addindent, newl)
818                writer.write(indent)
819            writer.write("</%s>%s" % (self.tagName, newl))
820        else:
821            writer.write("/>%s"%(newl))
822
823    def _get_attributes(self):
824        return NamedNodeMap(self._attrs, self._attrsNS, self)
825
826    def hasAttributes(self):
827        if self._attrs:
828            return True
829        else:
830            return False
831
832    # DOM Level 3 attributes, based on the 22 Oct 2002 draft
833
834    def setIdAttribute(self, name):
835        idAttr = self.getAttributeNode(name)
836        self.setIdAttributeNode(idAttr)
837
838    def setIdAttributeNS(self, namespaceURI, localName):
839        idAttr = self.getAttributeNodeNS(namespaceURI, localName)
840        self.setIdAttributeNode(idAttr)
841
842    def setIdAttributeNode(self, idAttr):
843        if idAttr is None or not self.isSameNode(idAttr.ownerElement):
844            raise xml.dom.NotFoundErr()
845        if _get_containing_entref(self) is not None:
846            raise xml.dom.NoModificationAllowedErr()
847        if not idAttr._is_id:
848            idAttr.__dict__['_is_id'] = True
849            self._magic_id_nodes += 1
850            self.ownerDocument._magic_id_count += 1
851            _clear_id_cache(self)
852
853defproperty(Element, "attributes",
854            doc="NamedNodeMap of attributes on the element.")
855defproperty(Element, "localName",
856            doc="Namespace-local name of this element.")
857
858
859def _set_attribute_node(element, attr):
860    _clear_id_cache(element)
861    element._attrs[attr.name] = attr
862    element._attrsNS[(attr.namespaceURI, attr.localName)] = attr
863
864    # This creates a circular reference, but Element.unlink()
865    # breaks the cycle since the references to the attribute
866    # dictionaries are tossed.
867    attr.__dict__['ownerElement'] = element
868
869
870class Childless:
871    """Mixin that makes childless-ness easy to implement and avoids
872    the complexity of the Node methods that deal with children.
873    """
874
875    attributes = None
876    childNodes = EmptyNodeList()
877    firstChild = None
878    lastChild = None
879
880    def _get_firstChild(self):
881        return None
882
883    def _get_lastChild(self):
884        return None
885
886    def appendChild(self, node):
887        raise xml.dom.HierarchyRequestErr(
888            self.nodeName + " nodes cannot have children")
889
890    def hasChildNodes(self):
891        return False
892
893    def insertBefore(self, newChild, refChild):
894        raise xml.dom.HierarchyRequestErr(
895            self.nodeName + " nodes do not have children")
896
897    def removeChild(self, oldChild):
898        raise xml.dom.NotFoundErr(
899            self.nodeName + " nodes do not have children")
900
901    def normalize(self):
902        # For childless nodes, normalize() has nothing to do.
903        pass
904
905    def replaceChild(self, newChild, oldChild):
906        raise xml.dom.HierarchyRequestErr(
907            self.nodeName + " nodes do not have children")
908
909
910class ProcessingInstruction(Childless, Node):
911    nodeType = Node.PROCESSING_INSTRUCTION_NODE
912
913    def __init__(self, target, data):
914        self.target = self.nodeName = target
915        self.data = self.nodeValue = data
916
917    def _get_data(self):
918        return self.data
919    def _set_data(self, value):
920        d = self.__dict__
921        d['data'] = d['nodeValue'] = value
922
923    def _get_target(self):
924        return self.target
925    def _set_target(self, value):
926        d = self.__dict__
927        d['target'] = d['nodeName'] = value
928
929    def __setattr__(self, name, value):
930        if name == "data" or name == "nodeValue":
931            self.__dict__['data'] = self.__dict__['nodeValue'] = value
932        elif name == "target" or name == "nodeName":
933            self.__dict__['target'] = self.__dict__['nodeName'] = value
934        else:
935            self.__dict__[name] = value
936
937    def writexml(self, writer, indent="", addindent="", newl=""):
938        writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl))
939
940
941class CharacterData(Childless, Node):
942    def _get_length(self):
943        return len(self.data)
944    __len__ = _get_length
945
946    def _get_data(self):
947        return self.__dict__['data']
948    def _set_data(self, data):
949        d = self.__dict__
950        d['data'] = d['nodeValue'] = data
951
952    _get_nodeValue = _get_data
953    _set_nodeValue = _set_data
954
955    def __setattr__(self, name, value):
956        if name == "data" or name == "nodeValue":
957            self.__dict__['data'] = self.__dict__['nodeValue'] = value
958        else:
959            self.__dict__[name] = value
960
961    def __repr__(self):
962        data = self.data
963        if len(data) > 10:
964            dotdotdot = "..."
965        else:
966            dotdotdot = ""
967        return '<DOM %s node "%r%s">' % (
968            self.__class__.__name__, data[0:10], dotdotdot)
969
970    def substringData(self, offset, count):
971        if offset < 0:
972            raise xml.dom.IndexSizeErr("offset cannot be negative")
973        if offset >= len(self.data):
974            raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
975        if count < 0:
976            raise xml.dom.IndexSizeErr("count cannot be negative")
977        return self.data[offset:offset+count]
978
979    def appendData(self, arg):
980        self.data = self.data + arg
981
982    def insertData(self, offset, arg):
983        if offset < 0:
984            raise xml.dom.IndexSizeErr("offset cannot be negative")
985        if offset >= len(self.data):
986            raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
987        if arg:
988            self.data = "%s%s%s" % (
989                self.data[:offset], arg, self.data[offset:])
990
991    def deleteData(self, offset, count):
992        if offset < 0:
993            raise xml.dom.IndexSizeErr("offset cannot be negative")
994        if offset >= len(self.data):
995            raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
996        if count < 0:
997            raise xml.dom.IndexSizeErr("count cannot be negative")
998        if count:
999            self.data = self.data[:offset] + self.data[offset+count:]
1000
1001    def replaceData(self, offset, count, arg):
1002        if offset < 0:
1003            raise xml.dom.IndexSizeErr("offset cannot be negative")
1004        if offset >= len(self.data):
1005            raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1006        if count < 0:
1007            raise xml.dom.IndexSizeErr("count cannot be negative")
1008        if count:
1009            self.data = "%s%s%s" % (
1010                self.data[:offset], arg, self.data[offset+count:])
1011
1012defproperty(CharacterData, "length", doc="Length of the string data.")
1013
1014
1015class Text(CharacterData):
1016    # Make sure we don't add an instance __dict__ if we don't already
1017    # have one, at least when that's possible:
1018    # XXX this does not work, CharacterData is an old-style class
1019    # __slots__ = ()
1020
1021    nodeType = Node.TEXT_NODE
1022    nodeName = "#text"
1023    attributes = None
1024
1025    def splitText(self, offset):
1026        if offset < 0 or offset > len(self.data):
1027            raise xml.dom.IndexSizeErr("illegal offset value")
1028        newText = self.__class__()
1029        newText.data = self.data[offset:]
1030        newText.ownerDocument = self.ownerDocument
1031        next = self.nextSibling
1032        if self.parentNode and self in self.parentNode.childNodes:
1033            if next is None:
1034                self.parentNode.appendChild(newText)
1035            else:
1036                self.parentNode.insertBefore(newText, next)
1037        self.data = self.data[:offset]
1038        return newText
1039
1040    def writexml(self, writer, indent="", addindent="", newl=""):
1041        _write_data(writer, "%s%s%s" % (indent, self.data, newl))
1042
1043    # DOM Level 3 (WD 9 April 2002)
1044
1045    def _get_wholeText(self):
1046        L = [self.data]
1047        n = self.previousSibling
1048        while n is not None:
1049            if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1050                L.insert(0, n.data)
1051                n = n.previousSibling
1052            else:
1053                break
1054        n = self.nextSibling
1055        while n is not None:
1056            if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1057                L.append(n.data)
1058                n = n.nextSibling
1059            else:
1060                break
1061        return ''.join(L)
1062
1063    def replaceWholeText(self, content):
1064        # XXX This needs to be seriously changed if minidom ever
1065        # supports EntityReference nodes.
1066        parent = self.parentNode
1067        n = self.previousSibling
1068        while n is not None:
1069            if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1070                next = n.previousSibling
1071                parent.removeChild(n)
1072                n = next
1073            else:
1074                break
1075        n = self.nextSibling
1076        if not content:
1077            parent.removeChild(self)
1078        while n is not None:
1079            if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1080                next = n.nextSibling
1081                parent.removeChild(n)
1082                n = next
1083            else:
1084                break
1085        if content:
1086            d = self.__dict__
1087            d['data'] = content
1088            d['nodeValue'] = content
1089            return self
1090        else:
1091            return None
1092
1093    def _get_isWhitespaceInElementContent(self):
1094        if self.data.strip():
1095            return False
1096        elem = _get_containing_element(self)
1097        if elem is None:
1098            return False
1099        info = self.ownerDocument._get_elem_info(elem)
1100        if info is None:
1101            return False
1102        else:
1103            return info.isElementContent()
1104
1105defproperty(Text, "isWhitespaceInElementContent",
1106            doc="True iff this text node contains only whitespace"
1107                " and is in element content.")
1108defproperty(Text, "wholeText",
1109            doc="The text of all logically-adjacent text nodes.")
1110
1111
1112def _get_containing_element(node):
1113    c = node.parentNode
1114    while c is not None:
1115        if c.nodeType == Node.ELEMENT_NODE:
1116            return c
1117        c = c.parentNode
1118    return None
1119
1120def _get_containing_entref(node):
1121    c = node.parentNode
1122    while c is not None:
1123        if c.nodeType == Node.ENTITY_REFERENCE_NODE:
1124            return c
1125        c = c.parentNode
1126    return None
1127
1128
1129class Comment(Childless, CharacterData):
1130    nodeType = Node.COMMENT_NODE
1131    nodeName = "#comment"
1132
1133    def __init__(self, data):
1134        self.data = self.nodeValue = data
1135
1136    def writexml(self, writer, indent="", addindent="", newl=""):
1137        if "--" in self.data:
1138            raise ValueError("'--' is not allowed in a comment node")
1139        writer.write("%s<!--%s-->%s" % (indent, self.data, newl))
1140
1141
1142class CDATASection(Text):
1143    # Make sure we don't add an instance __dict__ if we don't already
1144    # have one, at least when that's possible:
1145    # XXX this does not work, Text is an old-style class
1146    # __slots__ = ()
1147
1148    nodeType = Node.CDATA_SECTION_NODE
1149    nodeName = "#cdata-section"
1150
1151    def writexml(self, writer, indent="", addindent="", newl=""):
1152        if self.data.find("]]>") >= 0:
1153            raise ValueError("']]>' not allowed in a CDATA section")
1154        writer.write("<![CDATA[%s]]>" % self.data)
1155
1156
1157class ReadOnlySequentialNamedNodeMap(object):
1158    __slots__ = '_seq',
1159
1160    def __init__(self, seq=()):
1161        # seq should be a list or tuple
1162        self._seq = seq
1163
1164    def __len__(self):
1165        return len(self._seq)
1166
1167    def _get_length(self):
1168        return len(self._seq)
1169
1170    def getNamedItem(self, name):
1171        for n in self._seq:
1172            if n.nodeName == name:
1173                return n
1174
1175    def getNamedItemNS(self, namespaceURI, localName):
1176        for n in self._seq:
1177            if n.namespaceURI == namespaceURI and n.localName == localName:
1178                return n
1179
1180    def __getitem__(self, name_or_tuple):
1181        if isinstance(name_or_tuple, tuple):
1182            node = self.getNamedItemNS(*name_or_tuple)
1183        else:
1184            node = self.getNamedItem(name_or_tuple)
1185        if node is None:
1186            raise KeyError, name_or_tuple
1187        return node
1188
1189    def item(self, index):
1190        if index < 0:
1191            return None
1192        try:
1193            return self._seq[index]
1194        except IndexError:
1195            return None
1196
1197    def removeNamedItem(self, name):
1198        raise xml.dom.NoModificationAllowedErr(
1199            "NamedNodeMap instance is read-only")
1200
1201    def removeNamedItemNS(self, namespaceURI, localName):
1202        raise xml.dom.NoModificationAllowedErr(
1203            "NamedNodeMap instance is read-only")
1204
1205    def setNamedItem(self, node):
1206        raise xml.dom.NoModificationAllowedErr(
1207            "NamedNodeMap instance is read-only")
1208
1209    def setNamedItemNS(self, node):
1210        raise xml.dom.NoModificationAllowedErr(
1211            "NamedNodeMap instance is read-only")
1212
1213    def __getstate__(self):
1214        return [self._seq]
1215
1216    def __setstate__(self, state):
1217        self._seq = state[0]
1218
1219defproperty(ReadOnlySequentialNamedNodeMap, "length",
1220            doc="Number of entries in the NamedNodeMap.")
1221
1222
1223class Identified:
1224    """Mix-in class that supports the publicId and systemId attributes."""
1225
1226    # XXX this does not work, this is an old-style class
1227    # __slots__ = 'publicId', 'systemId'
1228
1229    def _identified_mixin_init(self, publicId, systemId):
1230        self.publicId = publicId
1231        self.systemId = systemId
1232
1233    def _get_publicId(self):
1234        return self.publicId
1235
1236    def _get_systemId(self):
1237        return self.systemId
1238
1239class DocumentType(Identified, Childless, Node):
1240    nodeType = Node.DOCUMENT_TYPE_NODE
1241    nodeValue = None
1242    name = None
1243    publicId = None
1244    systemId = None
1245    internalSubset = None
1246
1247    def __init__(self, qualifiedName):
1248        self.entities = ReadOnlySequentialNamedNodeMap()
1249        self.notations = ReadOnlySequentialNamedNodeMap()
1250        if qualifiedName:
1251            prefix, localname = _nssplit(qualifiedName)
1252            self.name = localname
1253        self.nodeName = self.name
1254
1255    def _get_internalSubset(self):
1256        return self.internalSubset
1257
1258    def cloneNode(self, deep):
1259        if self.ownerDocument is None:
1260            # it's ok
1261            clone = DocumentType(None)
1262            clone.name = self.name
1263            clone.nodeName = self.name
1264            operation = xml.dom.UserDataHandler.NODE_CLONED
1265            if deep:
1266                clone.entities._seq = []
1267                clone.notations._seq = []
1268                for n in self.notations._seq:
1269                    notation = Notation(n.nodeName, n.publicId, n.systemId)
1270                    clone.notations._seq.append(notation)
1271                    n._call_user_data_handler(operation, n, notation)
1272                for e in self.entities._seq:
1273                    entity = Entity(e.nodeName, e.publicId, e.systemId,
1274                                    e.notationName)
1275                    entity.actualEncoding = e.actualEncoding
1276                    entity.encoding = e.encoding
1277                    entity.version = e.version
1278                    clone.entities._seq.append(entity)
1279                    e._call_user_data_handler(operation, n, entity)
1280            self._call_user_data_handler(operation, self, clone)
1281            return clone
1282        else:
1283            return None
1284
1285    def writexml(self, writer, indent="", addindent="", newl=""):
1286        writer.write("<!DOCTYPE ")
1287        writer.write(self.name)
1288        if self.publicId:
1289            writer.write("%s  PUBLIC '%s'%s  '%s'"
1290                         % (newl, self.publicId, newl, self.systemId))
1291        elif self.systemId:
1292            writer.write("%s  SYSTEM '%s'" % (newl, self.systemId))
1293        if self.internalSubset is not None:
1294            writer.write(" [")
1295            writer.write(self.internalSubset)
1296            writer.write("]")
1297        writer.write(">"+newl)
1298
1299class Entity(Identified, Node):
1300    attributes = None
1301    nodeType = Node.ENTITY_NODE
1302    nodeValue = None
1303
1304    actualEncoding = None
1305    encoding = None
1306    version = None
1307
1308    def __init__(self, name, publicId, systemId, notation):
1309        self.nodeName = name
1310        self.notationName = notation
1311        self.childNodes = NodeList()
1312        self._identified_mixin_init(publicId, systemId)
1313
1314    def _get_actualEncoding(self):
1315        return self.actualEncoding
1316
1317    def _get_encoding(self):
1318        return self.encoding
1319
1320    def _get_version(self):
1321        return self.version
1322
1323    def appendChild(self, newChild):
1324        raise xml.dom.HierarchyRequestErr(
1325            "cannot append children to an entity node")
1326
1327    def insertBefore(self, newChild, refChild):
1328        raise xml.dom.HierarchyRequestErr(
1329            "cannot insert children below an entity node")
1330
1331    def removeChild(self, oldChild):
1332        raise xml.dom.HierarchyRequestErr(
1333            "cannot remove children from an entity node")
1334
1335    def replaceChild(self, newChild, oldChild):
1336        raise xml.dom.HierarchyRequestErr(
1337            "cannot replace children of an entity node")
1338
1339class Notation(Identified, Childless, Node):
1340    nodeType = Node.NOTATION_NODE
1341    nodeValue = None
1342
1343    def __init__(self, name, publicId, systemId):
1344        self.nodeName = name
1345        self._identified_mixin_init(publicId, systemId)
1346
1347
1348class DOMImplementation(DOMImplementationLS):
1349    _features = [("core", "1.0"),
1350                 ("core", "2.0"),
1351                 ("core", None),
1352                 ("xml", "1.0"),
1353                 ("xml", "2.0"),
1354                 ("xml", None),
1355                 ("ls-load", "3.0"),
1356                 ("ls-load", None),
1357                 ]
1358
1359    def hasFeature(self, feature, version):
1360        if version == "":
1361            version = None
1362        return (feature.lower(), version) in self._features
1363
1364    def createDocument(self, namespaceURI, qualifiedName, doctype):
1365        if doctype and doctype.parentNode is not None:
1366            raise xml.dom.WrongDocumentErr(
1367                "doctype object owned by another DOM tree")
1368        doc = self._create_document()
1369
1370        add_root_element = not (namespaceURI is None
1371                                and qualifiedName is None
1372                                and doctype is None)
1373
1374        if not qualifiedName and add_root_element:
1375            # The spec is unclear what to raise here; SyntaxErr
1376            # would be the other obvious candidate. Since Xerces raises
1377            # InvalidCharacterErr, and since SyntaxErr is not listed
1378            # for createDocument, that seems to be the better choice.
1379            # XXX: need to check for illegal characters here and in
1380            # createElement.
1381
1382            # DOM Level III clears this up when talking about the return value
1383            # of this function.  If namespaceURI, qName and DocType are
1384            # Null the document is returned without a document element
1385            # Otherwise if doctype or namespaceURI are not None
1386            # Then we go back to the above problem
1387            raise xml.dom.InvalidCharacterErr("Element with no name")
1388
1389        if add_root_element:
1390            prefix, localname = _nssplit(qualifiedName)
1391            if prefix == "xml" \
1392               and namespaceURI != "http://www.w3.org/XML/1998/namespace":
1393                raise xml.dom.NamespaceErr("illegal use of 'xml' prefix")
1394            if prefix and not namespaceURI:
1395                raise xml.dom.NamespaceErr(
1396                    "illegal use of prefix without namespaces")
1397            element = doc.createElementNS(namespaceURI, qualifiedName)
1398            if doctype:
1399                doc.appendChild(doctype)
1400            doc.appendChild(element)
1401
1402        if doctype:
1403            doctype.parentNode = doctype.ownerDocument = doc
1404
1405        doc.doctype = doctype
1406        doc.implementation = self
1407        return doc
1408
1409    def createDocumentType(self, qualifiedName, publicId, systemId):
1410        doctype = DocumentType(qualifiedName)
1411        doctype.publicId = publicId
1412        doctype.systemId = systemId
1413        return doctype
1414
1415    # DOM Level 3 (WD 9 April 2002)
1416
1417    def getInterface(self, feature):
1418        if self.hasFeature(feature, None):
1419            return self
1420        else:
1421            return None
1422
1423    # internal
1424    def _create_document(self):
1425        return Document()
1426
1427class ElementInfo(object):
1428    """Object that represents content-model information for an element.
1429
1430    This implementation is not expected to be used in practice; DOM
1431    builders should provide implementations which do the right thing
1432    using information available to it.
1433
1434    """
1435
1436    __slots__ = 'tagName',
1437
1438    def __init__(self, name):
1439        self.tagName = name
1440
1441    def getAttributeType(self, aname):
1442        return _no_type
1443
1444    def getAttributeTypeNS(self, namespaceURI, localName):
1445        return _no_type
1446
1447    def isElementContent(self):
1448        return False
1449
1450    def isEmpty(self):
1451        """Returns true iff this element is declared to have an EMPTY
1452        content model."""
1453        return False
1454
1455    def isId(self, aname):
1456        """Returns true iff the named attribute is a DTD-style ID."""
1457        return False
1458
1459    def isIdNS(self, namespaceURI, localName):
1460        """Returns true iff the identified attribute is a DTD-style ID."""
1461        return False
1462
1463    def __getstate__(self):
1464        return self.tagName
1465
1466    def __setstate__(self, state):
1467        self.tagName = state
1468
1469def _clear_id_cache(node):
1470    if node.nodeType == Node.DOCUMENT_NODE:
1471        node._id_cache.clear()
1472        node._id_search_stack = None
1473    elif _in_document(node):
1474        node.ownerDocument._id_cache.clear()
1475        node.ownerDocument._id_search_stack= None
1476
1477class Document(Node, DocumentLS):
1478    _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
1479                         Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE)
1480
1481    nodeType = Node.DOCUMENT_NODE
1482    nodeName = "#document"
1483    nodeValue = None
1484    attributes = None
1485    doctype = None
1486    parentNode = None
1487    previousSibling = nextSibling = None
1488
1489    implementation = DOMImplementation()
1490
1491    # Document attributes from Level 3 (WD 9 April 2002)
1492
1493    actualEncoding = None
1494    encoding = None
1495    standalone = None
1496    version = None
1497    strictErrorChecking = False
1498    errorHandler = None
1499    documentURI = None
1500
1501    _magic_id_count = 0
1502
1503    def __init__(self):
1504        self.childNodes = NodeList()
1505        # mapping of (namespaceURI, localName) -> ElementInfo
1506        #        and tagName -> ElementInfo
1507        self._elem_info = {}
1508        self._id_cache = {}
1509        self._id_search_stack = None
1510
1511    def _get_elem_info(self, element):
1512        if element.namespaceURI:
1513            key = element.namespaceURI, element.localName
1514        else:
1515            key = element.tagName
1516        return self._elem_info.get(key)
1517
1518    def _get_actualEncoding(self):
1519        return self.actualEncoding
1520
1521    def _get_doctype(self):
1522        return self.doctype
1523
1524    def _get_documentURI(self):
1525        return self.documentURI
1526
1527    def _get_encoding(self):
1528        return self.encoding
1529
1530    def _get_errorHandler(self):
1531        return self.errorHandler
1532
1533    def _get_standalone(self):
1534        return self.standalone
1535
1536    def _get_strictErrorChecking(self):
1537        return self.strictErrorChecking
1538
1539    def _get_version(self):
1540        return self.version
1541
1542    def appendChild(self, node):
1543        if node.nodeType not in self._child_node_types:
1544            raise xml.dom.HierarchyRequestErr(
1545                "%s cannot be child of %s" % (repr(node), repr(self)))
1546        if node.parentNode is not None:
1547            # This needs to be done before the next test since this
1548            # may *be* the document element, in which case it should
1549            # end up re-ordered to the end.
1550            node.parentNode.removeChild(node)
1551
1552        if node.nodeType == Node.ELEMENT_NODE \
1553           and self._get_documentElement():
1554            raise xml.dom.HierarchyRequestErr(
1555                "two document elements disallowed")
1556        return Node.appendChild(self, node)
1557
1558    def removeChild(self, oldChild):
1559        try:
1560            self.childNodes.remove(oldChild)
1561        except ValueError:
1562            raise xml.dom.NotFoundErr()
1563        oldChild.nextSibling = oldChild.previousSibling = None
1564        oldChild.parentNode = None
1565        if self.documentElement is oldChild:
1566            self.documentElement = None
1567
1568        return oldChild
1569
1570    def _get_documentElement(self):
1571        for node in self.childNodes:
1572            if node.nodeType == Node.ELEMENT_NODE:
1573                return node
1574
1575    def unlink(self):
1576        if self.doctype is not None:
1577            self.doctype.unlink()
1578            self.doctype = None
1579        Node.unlink(self)
1580
1581    def cloneNode(self, deep):
1582        if not deep:
1583            return None
1584        clone = self.implementation.createDocument(None, None, None)
1585        clone.encoding = self.encoding
1586        clone.standalone = self.standalone
1587        clone.version = self.version
1588        for n in self.childNodes:
1589            childclone = _clone_node(n, deep, clone)
1590            assert childclone.ownerDocument.isSameNode(clone)
1591            clone.childNodes.append(childclone)
1592            if childclone.nodeType == Node.DOCUMENT_NODE:
1593                assert clone.documentElement is None
1594            elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE:
1595                assert clone.doctype is None
1596                clone.doctype = childclone
1597            childclone.parentNode = clone
1598        self._call_user_data_handler(xml.dom.UserDataHandler.NODE_CLONED,
1599                                     self, clone)
1600        return clone
1601
1602    def createDocumentFragment(self):
1603        d = DocumentFragment()
1604        d.ownerDocument = self
1605        return d
1606
1607    def createElement(self, tagName):
1608        e = Element(tagName)
1609        e.ownerDocument = self
1610        return e
1611
1612    def createTextNode(self, data):
1613        if not isinstance(data, StringTypes):
1614            raise TypeError, "node contents must be a string"
1615        t = Text()
1616        t.data = data
1617        t.ownerDocument = self
1618        return t
1619
1620    def createCDATASection(self, data):
1621        if not isinstance(data, StringTypes):
1622            raise TypeError, "node contents must be a string"
1623        c = CDATASection()
1624        c.data = data
1625        c.ownerDocument = self
1626        return c
1627
1628    def createComment(self, data):
1629        c = Comment(data)
1630        c.ownerDocument = self
1631        return c
1632
1633    def createProcessingInstruction(self, target, data):
1634        p = ProcessingInstruction(target, data)
1635        p.ownerDocument = self
1636        return p
1637
1638    def createAttribute(self, qName):
1639        a = Attr(qName)
1640        a.ownerDocument = self
1641        a.value = ""
1642        return a
1643
1644    def createElementNS(self, namespaceURI, qualifiedName):
1645        prefix, localName = _nssplit(qualifiedName)
1646        e = Element(qualifiedName, namespaceURI, prefix)
1647        e.ownerDocument = self
1648        return e
1649
1650    def createAttributeNS(self, namespaceURI, qualifiedName):
1651        prefix, localName = _nssplit(qualifiedName)
1652        a = Attr(qualifiedName, namespaceURI, localName, prefix)
1653        a.ownerDocument = self
1654        a.value = ""
1655        return a
1656
1657    # A couple of implementation-specific helpers to create node types
1658    # not supported by the W3C DOM specs:
1659
1660    def _create_entity(self, name, publicId, systemId, notationName):
1661        e = Entity(name, publicId, systemId, notationName)
1662        e.ownerDocument = self
1663        return e
1664
1665    def _create_notation(self, name, publicId, systemId):
1666        n = Notation(name, publicId, systemId)
1667        n.ownerDocument = self
1668        return n
1669
1670    def getElementById(self, id):
1671        if id in self._id_cache:
1672            return self._id_cache[id]
1673        if not (self._elem_info or self._magic_id_count):
1674            return None
1675
1676        stack = self._id_search_stack
1677        if stack is None:
1678            # we never searched before, or the cache has been cleared
1679            stack = [self.documentElement]
1680            self._id_search_stack = stack
1681        elif not stack:
1682            # Previous search was completed and cache is still valid;
1683            # no matching node.
1684            return None
1685
1686        result = None
1687        while stack:
1688            node = stack.pop()
1689            # add child elements to stack for continued searching
1690            stack.extend([child for child in node.childNodes
1691                          if child.nodeType in _nodeTypes_with_children])
1692            # check this node
1693            info = self._get_elem_info(node)
1694            if info:
1695                # We have to process all ID attributes before
1696                # returning in order to get all the attributes set to
1697                # be IDs using Element.setIdAttribute*().
1698                for attr in node.attributes.values():
1699                    if attr.namespaceURI:
1700                        if info.isIdNS(attr.namespaceURI, attr.localName):
1701                            self._id_cache[attr.value] = node
1702                            if attr.value == id:
1703                                result = node
1704                            elif not node._magic_id_nodes:
1705                                break
1706                    elif info.isId(attr.name):
1707                        self._id_cache[attr.value] = node
1708                        if attr.value == id:
1709                            result = node
1710                        elif not node._magic_id_nodes:
1711                            break
1712                    elif attr._is_id:
1713                        self._id_cache[attr.value] = node
1714                        if attr.value == id:
1715                            result = node
1716                        elif node._magic_id_nodes == 1:
1717                            break
1718            elif node._magic_id_nodes:
1719                for attr in node.attributes.values():
1720                    if attr._is_id:
1721                        self._id_cache[attr.value] = node
1722                        if attr.value == id:
1723                            result = node
1724            if result is not None:
1725                break
1726        return result
1727
1728    def getElementsByTagName(self, name):
1729        return _get_elements_by_tagName_helper(self, name, NodeList())
1730
1731    def getElementsByTagNameNS(self, namespaceURI, localName):
1732        return _get_elements_by_tagName_ns_helper(
1733            self, namespaceURI, localName, NodeList())
1734
1735    def isSupported(self, feature, version):
1736        return self.implementation.hasFeature(feature, version)
1737
1738    def importNode(self, node, deep):
1739        if node.nodeType == Node.DOCUMENT_NODE:
1740            raise xml.dom.NotSupportedErr("cannot import document nodes")
1741        elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
1742            raise xml.dom.NotSupportedErr("cannot import document type nodes")
1743        return _clone_node(node, deep, self)
1744
1745    def writexml(self, writer, indent="", addindent="", newl="",
1746                 encoding = None):
1747        if encoding is None:
1748            writer.write('<?xml version="1.0" ?>'+newl)
1749        else:
1750            writer.write('<?xml version="1.0" encoding="%s"?>%s' % (encoding, newl))
1751        for node in self.childNodes:
1752            node.writexml(writer, indent, addindent, newl)
1753
1754    # DOM Level 3 (WD 9 April 2002)
1755
1756    def renameNode(self, n, namespaceURI, name):
1757        if n.ownerDocument is not self:
1758            raise xml.dom.WrongDocumentErr(
1759                "cannot rename nodes from other documents;\n"
1760                "expected %s,\nfound %s" % (self, n.ownerDocument))
1761        if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE):
1762            raise xml.dom.NotSupportedErr(
1763                "renameNode() only applies to element and attribute nodes")
1764        if namespaceURI != EMPTY_NAMESPACE:
1765            if ':' in name:
1766                prefix, localName = name.split(':', 1)
1767                if (  prefix == "xmlns"
1768                      and namespaceURI != xml.dom.XMLNS_NAMESPACE):
1769                    raise xml.dom.NamespaceErr(
1770                        "illegal use of 'xmlns' prefix")
1771            else:
1772                if (  name == "xmlns"
1773                      and namespaceURI != xml.dom.XMLNS_NAMESPACE
1774                      and n.nodeType == Node.ATTRIBUTE_NODE):
1775                    raise xml.dom.NamespaceErr(
1776                        "illegal use of the 'xmlns' attribute")
1777                prefix = None
1778                localName = name
1779        else:
1780            prefix = None
1781            localName = None
1782        if n.nodeType == Node.ATTRIBUTE_NODE:
1783            element = n.ownerElement
1784            if element is not None:
1785                is_id = n._is_id
1786                element.removeAttributeNode(n)
1787        else:
1788            element = None
1789        # avoid __setattr__
1790        d = n.__dict__
1791        d['prefix'] = prefix
1792        d['localName'] = localName
1793        d['namespaceURI'] = namespaceURI
1794        d['nodeName'] = name
1795        if n.nodeType == Node.ELEMENT_NODE:
1796            d['tagName'] = name
1797        else:
1798            # attribute node
1799            d['name'] = name
1800            if element is not None:
1801                element.setAttributeNode(n)
1802                if is_id:
1803                    element.setIdAttributeNode(n)
1804        # It's not clear from a semantic perspective whether we should
1805        # call the user data handlers for the NODE_RENAMED event since
1806        # we're re-using the existing node.  The draft spec has been
1807        # interpreted as meaning "no, don't call the handler unless a
1808        # new node is created."
1809        return n
1810
1811defproperty(Document, "documentElement",
1812            doc="Top-level element of this document.")
1813
1814
1815def _clone_node(node, deep, newOwnerDocument):
1816    """
1817    Clone a node and give it the new owner document.
1818    Called by Node.cloneNode and Document.importNode
1819    """
1820    if node.ownerDocument.isSameNode(newOwnerDocument):
1821        operation = xml.dom.UserDataHandler.NODE_CLONED
1822    else:
1823        operation = xml.dom.UserDataHandler.NODE_IMPORTED
1824    if node.nodeType == Node.ELEMENT_NODE:
1825        clone = newOwnerDocument.createElementNS(node.namespaceURI,
1826                                                 node.nodeName)
1827        for attr in node.attributes.values():
1828            clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value)
1829            a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName)
1830            a.specified = attr.specified
1831
1832        if deep:
1833            for child in node.childNodes:
1834                c = _clone_node(child, deep, newOwnerDocument)
1835                clone.appendChild(c)
1836
1837    elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
1838        clone = newOwnerDocument.createDocumentFragment()
1839        if deep:
1840            for child in node.childNodes:
1841                c = _clone_node(child, deep, newOwnerDocument)
1842                clone.appendChild(c)
1843
1844    elif node.nodeType == Node.TEXT_NODE:
1845        clone = newOwnerDocument.createTextNode(node.data)
1846    elif node.nodeType == Node.CDATA_SECTION_NODE:
1847        clone = newOwnerDocument.createCDATASection(node.data)
1848    elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
1849        clone = newOwnerDocument.createProcessingInstruction(node.target,
1850                                                             node.data)
1851    elif node.nodeType == Node.COMMENT_NODE:
1852        clone = newOwnerDocument.createComment(node.data)
1853    elif node.nodeType == Node.ATTRIBUTE_NODE:
1854        clone = newOwnerDocument.createAttributeNS(node.namespaceURI,
1855                                                   node.nodeName)
1856        clone.specified = True
1857        clone.value = node.value
1858    elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
1859        assert node.ownerDocument is not newOwnerDocument
1860        operation = xml.dom.UserDataHandler.NODE_IMPORTED
1861        clone = newOwnerDocument.implementation.createDocumentType(
1862            node.name, node.publicId, node.systemId)
1863        clone.ownerDocument = newOwnerDocument
1864        if deep:
1865            clone.entities._seq = []
1866            clone.notations._seq = []
1867            for n in node.notations._seq:
1868                notation = Notation(n.nodeName, n.publicId, n.systemId)
1869                notation.ownerDocument = newOwnerDocument
1870                clone.notations._seq.append(notation)
1871                if hasattr(n, '_call_user_data_handler'):
1872                    n._call_user_data_handler(operation, n, notation)
1873            for e in node.entities._seq:
1874                entity = Entity(e.nodeName, e.publicId, e.systemId,
1875                                e.notationName)
1876                entity.actualEncoding = e.actualEncoding
1877                entity.encoding = e.encoding
1878                entity.version = e.version
1879                entity.ownerDocument = newOwnerDocument
1880                clone.entities._seq.append(entity)
1881                if hasattr(e, '_call_user_data_handler'):
1882                    e._call_user_data_handler(operation, n, entity)
1883    else:
1884        # Note the cloning of Document and DocumentType nodes is
1885        # implementation specific.  minidom handles those cases
1886        # directly in the cloneNode() methods.
1887        raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node))
1888
1889    # Check for _call_user_data_handler() since this could conceivably
1890    # used with other DOM implementations (one of the FourThought
1891    # DOMs, perhaps?).
1892    if hasattr(node, '_call_user_data_handler'):
1893        node._call_user_data_handler(operation, node, clone)
1894    return clone
1895
1896
1897def _nssplit(qualifiedName):
1898    fields = qualifiedName.split(':', 1)
1899    if len(fields) == 2:
1900        return fields
1901    else:
1902        return (None, fields[0])
1903
1904
1905def _get_StringIO():
1906    # we can't use cStringIO since it doesn't support Unicode strings
1907    from StringIO import StringIO
1908    return StringIO()
1909
1910def _do_pulldom_parse(func, args, kwargs):
1911    events = func(*args, **kwargs)
1912    toktype, rootNode = events.getEvent()
1913    events.expandNode(rootNode)
1914    events.clear()
1915    return rootNode
1916
1917def parse(file, parser=None, bufsize=None):
1918    """Parse a file into a DOM by filename or file object."""
1919    if parser is None and not bufsize:
1920        from xml.dom import expatbuilder
1921        return expatbuilder.parse(file)
1922    else:
1923        from xml.dom import pulldom
1924        return _do_pulldom_parse(pulldom.parse, (file,),
1925            {'parser': parser, 'bufsize': bufsize})
1926
1927def parseString(string, parser=None):
1928    """Parse a file into a DOM from a string."""
1929    if parser is None:
1930        from xml.dom import expatbuilder
1931        return expatbuilder.parseString(string)
1932    else:
1933        from xml.dom import pulldom
1934        return _do_pulldom_parse(pulldom.parseString, (string,),
1935                                 {'parser': parser})
1936
1937def getDOMImplementation(features=None):
1938    if features:
1939        if isinstance(features, StringTypes):
1940            features = domreg._parse_feature_string(features)
1941        for f, v in features:
1942            if not Document.implementation.hasFeature(f, v):
1943                return None
1944    return Document.implementation
1945