1"""\ 2minidom.py -- a lightweight DOM implementation. 3 4parse("foo.xml") 5 6parseString("<foo><bar/></foo>") 7 8Todo: 9===== 10 * convenience methods for getting elements and text. 11 * more testing 12 * bring some of the writer and linearizer code into conformance with this 13 interface 14 * SAX 2 namespaces 15""" 16 17import xml.dom 18 19from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg 20from xml.dom.minicompat import * 21from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS 22 23# This is used by the ID-cache invalidation checks; the list isn't 24# actually complete, since the nodes being checked will never be the 25# DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE. (The node being checked is 26# the node being added or removed, not the node being modified.) 27# 28_nodeTypes_with_children = (xml.dom.Node.ELEMENT_NODE, 29 xml.dom.Node.ENTITY_REFERENCE_NODE) 30 31 32class Node(xml.dom.Node): 33 namespaceURI = None # this is non-null only for elements and attributes 34 parentNode = None 35 ownerDocument = None 36 nextSibling = None 37 previousSibling = None 38 39 prefix = EMPTY_PREFIX # non-null only for NS elements and attributes 40 41 def __nonzero__(self): 42 return True 43 44 def toxml(self, encoding = None): 45 return self.toprettyxml("", "", encoding) 46 47 def toprettyxml(self, indent="\t", newl="\n", encoding = None): 48 # indent = the indentation string to prepend, per level 49 # newl = the newline string to append 50 writer = _get_StringIO() 51 if encoding is not None: 52 import codecs 53 # Can't use codecs.getwriter to preserve 2.0 compatibility 54 writer = codecs.lookup(encoding)[3](writer) 55 if self.nodeType == Node.DOCUMENT_NODE: 56 # Can pass encoding only to document, to put it into XML header 57 self.writexml(writer, "", indent, newl, encoding) 58 else: 59 self.writexml(writer, "", indent, newl) 60 return writer.getvalue() 61 62 def hasChildNodes(self): 63 if self.childNodes: 64 return True 65 else: 66 return False 67 68 def _get_childNodes(self): 69 return self.childNodes 70 71 def _get_firstChild(self): 72 if self.childNodes: 73 return self.childNodes[0] 74 75 def _get_lastChild(self): 76 if self.childNodes: 77 return self.childNodes[-1] 78 79 def insertBefore(self, newChild, refChild): 80 if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE: 81 for c in tuple(newChild.childNodes): 82 self.insertBefore(c, refChild) 83 ### The DOM does not clearly specify what to return in this case 84 return newChild 85 if newChild.nodeType not in self._child_node_types: 86 raise xml.dom.HierarchyRequestErr( 87 "%s cannot be child of %s" % (repr(newChild), repr(self))) 88 if newChild.parentNode is not None: 89 newChild.parentNode.removeChild(newChild) 90 if refChild is None: 91 self.appendChild(newChild) 92 else: 93 try: 94 index = self.childNodes.index(refChild) 95 except ValueError: 96 raise xml.dom.NotFoundErr() 97 if newChild.nodeType in _nodeTypes_with_children: 98 _clear_id_cache(self) 99 self.childNodes.insert(index, newChild) 100 newChild.nextSibling = refChild 101 refChild.previousSibling = newChild 102 if index: 103 node = self.childNodes[index-1] 104 node.nextSibling = newChild 105 newChild.previousSibling = node 106 else: 107 newChild.previousSibling = None 108 newChild.parentNode = self 109 return newChild 110 111 def appendChild(self, node): 112 if node.nodeType == self.DOCUMENT_FRAGMENT_NODE: 113 for c in tuple(node.childNodes): 114 self.appendChild(c) 115 ### The DOM does not clearly specify what to return in this case 116 return node 117 if node.nodeType not in self._child_node_types: 118 raise xml.dom.HierarchyRequestErr( 119 "%s cannot be child of %s" % (repr(node), repr(self))) 120 elif node.nodeType in _nodeTypes_with_children: 121 _clear_id_cache(self) 122 if node.parentNode is not None: 123 node.parentNode.removeChild(node) 124 _append_child(self, node) 125 node.nextSibling = None 126 return node 127 128 def replaceChild(self, newChild, oldChild): 129 if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE: 130 refChild = oldChild.nextSibling 131 self.removeChild(oldChild) 132 return self.insertBefore(newChild, refChild) 133 if newChild.nodeType not in self._child_node_types: 134 raise xml.dom.HierarchyRequestErr( 135 "%s cannot be child of %s" % (repr(newChild), repr(self))) 136 if newChild is oldChild: 137 return 138 if newChild.parentNode is not None: 139 newChild.parentNode.removeChild(newChild) 140 try: 141 index = self.childNodes.index(oldChild) 142 except ValueError: 143 raise xml.dom.NotFoundErr() 144 self.childNodes[index] = newChild 145 newChild.parentNode = self 146 oldChild.parentNode = None 147 if (newChild.nodeType in _nodeTypes_with_children 148 or oldChild.nodeType in _nodeTypes_with_children): 149 _clear_id_cache(self) 150 newChild.nextSibling = oldChild.nextSibling 151 newChild.previousSibling = oldChild.previousSibling 152 oldChild.nextSibling = None 153 oldChild.previousSibling = None 154 if newChild.previousSibling: 155 newChild.previousSibling.nextSibling = newChild 156 if newChild.nextSibling: 157 newChild.nextSibling.previousSibling = newChild 158 return oldChild 159 160 def removeChild(self, oldChild): 161 try: 162 self.childNodes.remove(oldChild) 163 except ValueError: 164 raise xml.dom.NotFoundErr() 165 if oldChild.nextSibling is not None: 166 oldChild.nextSibling.previousSibling = oldChild.previousSibling 167 if oldChild.previousSibling is not None: 168 oldChild.previousSibling.nextSibling = oldChild.nextSibling 169 oldChild.nextSibling = oldChild.previousSibling = None 170 if oldChild.nodeType in _nodeTypes_with_children: 171 _clear_id_cache(self) 172 173 oldChild.parentNode = None 174 return oldChild 175 176 def normalize(self): 177 L = [] 178 for child in self.childNodes: 179 if child.nodeType == Node.TEXT_NODE: 180 if not child.data: 181 # empty text node; discard 182 if L: 183 L[-1].nextSibling = child.nextSibling 184 if child.nextSibling: 185 child.nextSibling.previousSibling = child.previousSibling 186 child.unlink() 187 elif L and L[-1].nodeType == child.nodeType: 188 # collapse text node 189 node = L[-1] 190 node.data = node.data + child.data 191 node.nextSibling = child.nextSibling 192 if child.nextSibling: 193 child.nextSibling.previousSibling = node 194 child.unlink() 195 else: 196 L.append(child) 197 else: 198 L.append(child) 199 if child.nodeType == Node.ELEMENT_NODE: 200 child.normalize() 201 self.childNodes[:] = L 202 203 def cloneNode(self, deep): 204 return _clone_node(self, deep, self.ownerDocument or self) 205 206 def isSupported(self, feature, version): 207 return self.ownerDocument.implementation.hasFeature(feature, version) 208 209 def _get_localName(self): 210 # Overridden in Element and Attr where localName can be Non-Null 211 return None 212 213 # Node interfaces from Level 3 (WD 9 April 2002) 214 215 def isSameNode(self, other): 216 return self is other 217 218 def getInterface(self, feature): 219 if self.isSupported(feature, None): 220 return self 221 else: 222 return None 223 224 # The "user data" functions use a dictionary that is only present 225 # if some user data has been set, so be careful not to assume it 226 # exists. 227 228 def getUserData(self, key): 229 try: 230 return self._user_data[key][0] 231 except (AttributeError, KeyError): 232 return None 233 234 def setUserData(self, key, data, handler): 235 old = None 236 try: 237 d = self._user_data 238 except AttributeError: 239 d = {} 240 self._user_data = d 241 if key in d: 242 old = d[key][0] 243 if data is None: 244 # ignore handlers passed for None 245 handler = None 246 if old is not None: 247 del d[key] 248 else: 249 d[key] = (data, handler) 250 return old 251 252 def _call_user_data_handler(self, operation, src, dst): 253 if hasattr(self, "_user_data"): 254 for key, (data, handler) in self._user_data.items(): 255 if handler is not None: 256 handler.handle(operation, key, data, src, dst) 257 258 # minidom-specific API: 259 260 def unlink(self): 261 self.parentNode = self.ownerDocument = None 262 if self.childNodes: 263 for child in self.childNodes: 264 child.unlink() 265 self.childNodes = NodeList() 266 self.previousSibling = None 267 self.nextSibling = None 268 269defproperty(Node, "firstChild", doc="First child node, or None.") 270defproperty(Node, "lastChild", doc="Last child node, or None.") 271defproperty(Node, "localName", doc="Namespace-local name of this node.") 272 273 274def _append_child(self, node): 275 # fast path with less checks; usable by DOM builders if careful 276 childNodes = self.childNodes 277 if childNodes: 278 last = childNodes[-1] 279 node.__dict__["previousSibling"] = last 280 last.__dict__["nextSibling"] = node 281 childNodes.append(node) 282 node.__dict__["parentNode"] = self 283 284def _in_document(node): 285 # return True iff node is part of a document tree 286 while node is not None: 287 if node.nodeType == Node.DOCUMENT_NODE: 288 return True 289 node = node.parentNode 290 return False 291 292def _write_data(writer, data): 293 "Writes datachars to writer." 294 if data: 295 data = data.replace("&", "&").replace("<", "<"). \ 296 replace("\"", """).replace(">", ">") 297 writer.write(data) 298 299def _get_elements_by_tagName_helper(parent, name, rc): 300 for node in parent.childNodes: 301 if node.nodeType == Node.ELEMENT_NODE and \ 302 (name == "*" or node.tagName == name): 303 rc.append(node) 304 _get_elements_by_tagName_helper(node, name, rc) 305 return rc 306 307def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc): 308 for node in parent.childNodes: 309 if node.nodeType == Node.ELEMENT_NODE: 310 if ((localName == "*" or node.localName == localName) and 311 (nsURI == "*" or node.namespaceURI == nsURI)): 312 rc.append(node) 313 _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc) 314 return rc 315 316class DocumentFragment(Node): 317 nodeType = Node.DOCUMENT_FRAGMENT_NODE 318 nodeName = "#document-fragment" 319 nodeValue = None 320 attributes = None 321 parentNode = None 322 _child_node_types = (Node.ELEMENT_NODE, 323 Node.TEXT_NODE, 324 Node.CDATA_SECTION_NODE, 325 Node.ENTITY_REFERENCE_NODE, 326 Node.PROCESSING_INSTRUCTION_NODE, 327 Node.COMMENT_NODE, 328 Node.NOTATION_NODE) 329 330 def __init__(self): 331 self.childNodes = NodeList() 332 333 334class Attr(Node): 335 nodeType = Node.ATTRIBUTE_NODE 336 attributes = None 337 ownerElement = None 338 specified = False 339 _is_id = False 340 341 _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE) 342 343 def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None, 344 prefix=None): 345 # skip setattr for performance 346 d = self.__dict__ 347 d["nodeName"] = d["name"] = qName 348 d["namespaceURI"] = namespaceURI 349 d["prefix"] = prefix 350 d['childNodes'] = NodeList() 351 352 # Add the single child node that represents the value of the attr 353 self.childNodes.append(Text()) 354 355 # nodeValue and value are set elsewhere 356 357 def _get_localName(self): 358 return self.nodeName.split(":", 1)[-1] 359 360 def _get_name(self): 361 return self.name 362 363 def _get_specified(self): 364 return self.specified 365 366 def __setattr__(self, name, value): 367 d = self.__dict__ 368 if name in ("value", "nodeValue"): 369 d["value"] = d["nodeValue"] = value 370 d2 = self.childNodes[0].__dict__ 371 d2["data"] = d2["nodeValue"] = value 372 if self.ownerElement is not None: 373 _clear_id_cache(self.ownerElement) 374 elif name in ("name", "nodeName"): 375 d["name"] = d["nodeName"] = value 376 if self.ownerElement is not None: 377 _clear_id_cache(self.ownerElement) 378 else: 379 d[name] = value 380 381 def _set_prefix(self, prefix): 382 nsuri = self.namespaceURI 383 if prefix == "xmlns": 384 if nsuri and nsuri != XMLNS_NAMESPACE: 385 raise xml.dom.NamespaceErr( 386 "illegal use of 'xmlns' prefix for the wrong namespace") 387 d = self.__dict__ 388 d['prefix'] = prefix 389 if prefix is None: 390 newName = self.localName 391 else: 392 newName = "%s:%s" % (prefix, self.localName) 393 if self.ownerElement: 394 _clear_id_cache(self.ownerElement) 395 d['nodeName'] = d['name'] = newName 396 397 def _set_value(self, value): 398 d = self.__dict__ 399 d['value'] = d['nodeValue'] = value 400 if self.ownerElement: 401 _clear_id_cache(self.ownerElement) 402 self.childNodes[0].data = value 403 404 def unlink(self): 405 # This implementation does not call the base implementation 406 # since most of that is not needed, and the expense of the 407 # method call is not warranted. We duplicate the removal of 408 # children, but that's all we needed from the base class. 409 elem = self.ownerElement 410 if elem is not None: 411 del elem._attrs[self.nodeName] 412 del elem._attrsNS[(self.namespaceURI, self.localName)] 413 if self._is_id: 414 self._is_id = False 415 elem._magic_id_nodes -= 1 416 self.ownerDocument._magic_id_count -= 1 417 for child in self.childNodes: 418 child.unlink() 419 del self.childNodes[:] 420 421 def _get_isId(self): 422 if self._is_id: 423 return True 424 doc = self.ownerDocument 425 elem = self.ownerElement 426 if doc is None or elem is None: 427 return False 428 429 info = doc._get_elem_info(elem) 430 if info is None: 431 return False 432 if self.namespaceURI: 433 return info.isIdNS(self.namespaceURI, self.localName) 434 else: 435 return info.isId(self.nodeName) 436 437 def _get_schemaType(self): 438 doc = self.ownerDocument 439 elem = self.ownerElement 440 if doc is None or elem is None: 441 return _no_type 442 443 info = doc._get_elem_info(elem) 444 if info is None: 445 return _no_type 446 if self.namespaceURI: 447 return info.getAttributeTypeNS(self.namespaceURI, self.localName) 448 else: 449 return info.getAttributeType(self.nodeName) 450 451defproperty(Attr, "isId", doc="True if this attribute is an ID.") 452defproperty(Attr, "localName", doc="Namespace-local name of this attribute.") 453defproperty(Attr, "schemaType", doc="Schema type for this attribute.") 454 455 456class NamedNodeMap(object): 457 """The attribute list is a transient interface to the underlying 458 dictionaries. Mutations here will change the underlying element's 459 dictionary. 460 461 Ordering is imposed artificially and does not reflect the order of 462 attributes as found in an input document. 463 """ 464 465 __slots__ = ('_attrs', '_attrsNS', '_ownerElement') 466 467 def __init__(self, attrs, attrsNS, ownerElement): 468 self._attrs = attrs 469 self._attrsNS = attrsNS 470 self._ownerElement = ownerElement 471 472 def _get_length(self): 473 return len(self._attrs) 474 475 def item(self, index): 476 try: 477 return self[self._attrs.keys()[index]] 478 except IndexError: 479 return None 480 481 def items(self): 482 L = [] 483 for node in self._attrs.values(): 484 L.append((node.nodeName, node.value)) 485 return L 486 487 def itemsNS(self): 488 L = [] 489 for node in self._attrs.values(): 490 L.append(((node.namespaceURI, node.localName), node.value)) 491 return L 492 493 def has_key(self, key): 494 if isinstance(key, StringTypes): 495 return key in self._attrs 496 else: 497 return key in self._attrsNS 498 499 def keys(self): 500 return self._attrs.keys() 501 502 def keysNS(self): 503 return self._attrsNS.keys() 504 505 def values(self): 506 return self._attrs.values() 507 508 def get(self, name, value=None): 509 return self._attrs.get(name, value) 510 511 __len__ = _get_length 512 513 __hash__ = None # Mutable type can't be correctly hashed 514 def __cmp__(self, other): 515 if self._attrs is getattr(other, "_attrs", None): 516 return 0 517 else: 518 return cmp(id(self), id(other)) 519 520 def __getitem__(self, attname_or_tuple): 521 if isinstance(attname_or_tuple, tuple): 522 return self._attrsNS[attname_or_tuple] 523 else: 524 return self._attrs[attname_or_tuple] 525 526 # same as set 527 def __setitem__(self, attname, value): 528 if isinstance(value, StringTypes): 529 try: 530 node = self._attrs[attname] 531 except KeyError: 532 node = Attr(attname) 533 node.ownerDocument = self._ownerElement.ownerDocument 534 self.setNamedItem(node) 535 node.value = value 536 else: 537 if not isinstance(value, Attr): 538 raise TypeError, "value must be a string or Attr object" 539 node = value 540 self.setNamedItem(node) 541 542 def getNamedItem(self, name): 543 try: 544 return self._attrs[name] 545 except KeyError: 546 return None 547 548 def getNamedItemNS(self, namespaceURI, localName): 549 try: 550 return self._attrsNS[(namespaceURI, localName)] 551 except KeyError: 552 return None 553 554 def removeNamedItem(self, name): 555 n = self.getNamedItem(name) 556 if n is not None: 557 _clear_id_cache(self._ownerElement) 558 del self._attrs[n.nodeName] 559 del self._attrsNS[(n.namespaceURI, n.localName)] 560 if 'ownerElement' in n.__dict__: 561 n.__dict__['ownerElement'] = None 562 return n 563 else: 564 raise xml.dom.NotFoundErr() 565 566 def removeNamedItemNS(self, namespaceURI, localName): 567 n = self.getNamedItemNS(namespaceURI, localName) 568 if n is not None: 569 _clear_id_cache(self._ownerElement) 570 del self._attrsNS[(n.namespaceURI, n.localName)] 571 del self._attrs[n.nodeName] 572 if 'ownerElement' in n.__dict__: 573 n.__dict__['ownerElement'] = None 574 return n 575 else: 576 raise xml.dom.NotFoundErr() 577 578 def setNamedItem(self, node): 579 if not isinstance(node, Attr): 580 raise xml.dom.HierarchyRequestErr( 581 "%s cannot be child of %s" % (repr(node), repr(self))) 582 old = self._attrs.get(node.name) 583 if old: 584 old.unlink() 585 self._attrs[node.name] = node 586 self._attrsNS[(node.namespaceURI, node.localName)] = node 587 node.ownerElement = self._ownerElement 588 _clear_id_cache(node.ownerElement) 589 return old 590 591 def setNamedItemNS(self, node): 592 return self.setNamedItem(node) 593 594 def __delitem__(self, attname_or_tuple): 595 node = self[attname_or_tuple] 596 _clear_id_cache(node.ownerElement) 597 node.unlink() 598 599 def __getstate__(self): 600 return self._attrs, self._attrsNS, self._ownerElement 601 602 def __setstate__(self, state): 603 self._attrs, self._attrsNS, self._ownerElement = state 604 605defproperty(NamedNodeMap, "length", 606 doc="Number of nodes in the NamedNodeMap.") 607 608AttributeList = NamedNodeMap 609 610 611class TypeInfo(object): 612 __slots__ = 'namespace', 'name' 613 614 def __init__(self, namespace, name): 615 self.namespace = namespace 616 self.name = name 617 618 def __repr__(self): 619 if self.namespace: 620 return "<TypeInfo %r (from %r)>" % (self.name, self.namespace) 621 else: 622 return "<TypeInfo %r>" % self.name 623 624 def _get_name(self): 625 return self.name 626 627 def _get_namespace(self): 628 return self.namespace 629 630_no_type = TypeInfo(None, None) 631 632class Element(Node): 633 nodeType = Node.ELEMENT_NODE 634 nodeValue = None 635 schemaType = _no_type 636 637 _magic_id_nodes = 0 638 639 _child_node_types = (Node.ELEMENT_NODE, 640 Node.PROCESSING_INSTRUCTION_NODE, 641 Node.COMMENT_NODE, 642 Node.TEXT_NODE, 643 Node.CDATA_SECTION_NODE, 644 Node.ENTITY_REFERENCE_NODE) 645 646 def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None, 647 localName=None): 648 self.tagName = self.nodeName = tagName 649 self.prefix = prefix 650 self.namespaceURI = namespaceURI 651 self.childNodes = NodeList() 652 653 self._attrs = {} # attributes are double-indexed: 654 self._attrsNS = {} # tagName -> Attribute 655 # URI,localName -> Attribute 656 # in the future: consider lazy generation 657 # of attribute objects this is too tricky 658 # for now because of headaches with 659 # namespaces. 660 661 def _get_localName(self): 662 return self.tagName.split(":", 1)[-1] 663 664 def _get_tagName(self): 665 return self.tagName 666 667 def unlink(self): 668 for attr in self._attrs.values(): 669 attr.unlink() 670 self._attrs = None 671 self._attrsNS = None 672 Node.unlink(self) 673 674 def getAttribute(self, attname): 675 try: 676 return self._attrs[attname].value 677 except KeyError: 678 return "" 679 680 def getAttributeNS(self, namespaceURI, localName): 681 try: 682 return self._attrsNS[(namespaceURI, localName)].value 683 except KeyError: 684 return "" 685 686 def setAttribute(self, attname, value): 687 attr = self.getAttributeNode(attname) 688 if attr is None: 689 attr = Attr(attname) 690 # for performance 691 d = attr.__dict__ 692 d["value"] = d["nodeValue"] = value 693 d["ownerDocument"] = self.ownerDocument 694 self.setAttributeNode(attr) 695 elif value != attr.value: 696 d = attr.__dict__ 697 d["value"] = d["nodeValue"] = value 698 if attr.isId: 699 _clear_id_cache(self) 700 701 def setAttributeNS(self, namespaceURI, qualifiedName, value): 702 prefix, localname = _nssplit(qualifiedName) 703 attr = self.getAttributeNodeNS(namespaceURI, localname) 704 if attr is None: 705 # for performance 706 attr = Attr(qualifiedName, namespaceURI, localname, prefix) 707 d = attr.__dict__ 708 d["prefix"] = prefix 709 d["nodeName"] = qualifiedName 710 d["value"] = d["nodeValue"] = value 711 d["ownerDocument"] = self.ownerDocument 712 self.setAttributeNode(attr) 713 else: 714 d = attr.__dict__ 715 if value != attr.value: 716 d["value"] = d["nodeValue"] = value 717 if attr.isId: 718 _clear_id_cache(self) 719 if attr.prefix != prefix: 720 d["prefix"] = prefix 721 d["nodeName"] = qualifiedName 722 723 def getAttributeNode(self, attrname): 724 return self._attrs.get(attrname) 725 726 def getAttributeNodeNS(self, namespaceURI, localName): 727 return self._attrsNS.get((namespaceURI, localName)) 728 729 def setAttributeNode(self, attr): 730 if attr.ownerElement not in (None, self): 731 raise xml.dom.InuseAttributeErr("attribute node already owned") 732 old1 = self._attrs.get(attr.name, None) 733 if old1 is not None: 734 self.removeAttributeNode(old1) 735 old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None) 736 if old2 is not None and old2 is not old1: 737 self.removeAttributeNode(old2) 738 _set_attribute_node(self, attr) 739 740 if old1 is not attr: 741 # It might have already been part of this node, in which case 742 # it doesn't represent a change, and should not be returned. 743 return old1 744 if old2 is not attr: 745 return old2 746 747 setAttributeNodeNS = setAttributeNode 748 749 def removeAttribute(self, name): 750 try: 751 attr = self._attrs[name] 752 except KeyError: 753 raise xml.dom.NotFoundErr() 754 self.removeAttributeNode(attr) 755 756 def removeAttributeNS(self, namespaceURI, localName): 757 try: 758 attr = self._attrsNS[(namespaceURI, localName)] 759 except KeyError: 760 raise xml.dom.NotFoundErr() 761 self.removeAttributeNode(attr) 762 763 def removeAttributeNode(self, node): 764 if node is None: 765 raise xml.dom.NotFoundErr() 766 try: 767 self._attrs[node.name] 768 except KeyError: 769 raise xml.dom.NotFoundErr() 770 _clear_id_cache(self) 771 node.unlink() 772 # Restore this since the node is still useful and otherwise 773 # unlinked 774 node.ownerDocument = self.ownerDocument 775 776 removeAttributeNodeNS = removeAttributeNode 777 778 def hasAttribute(self, name): 779 return name in self._attrs 780 781 def hasAttributeNS(self, namespaceURI, localName): 782 return (namespaceURI, localName) in self._attrsNS 783 784 def getElementsByTagName(self, name): 785 return _get_elements_by_tagName_helper(self, name, NodeList()) 786 787 def getElementsByTagNameNS(self, namespaceURI, localName): 788 return _get_elements_by_tagName_ns_helper( 789 self, namespaceURI, localName, NodeList()) 790 791 def __repr__(self): 792 return "<DOM Element: %s at %#x>" % (self.tagName, id(self)) 793 794 def writexml(self, writer, indent="", addindent="", newl=""): 795 # indent = current indentation 796 # addindent = indentation to add to higher levels 797 # newl = newline string 798 writer.write(indent+"<" + self.tagName) 799 800 attrs = self._get_attributes() 801 a_names = attrs.keys() 802 a_names.sort() 803 804 for a_name in a_names: 805 writer.write(" %s=\"" % a_name) 806 _write_data(writer, attrs[a_name].value) 807 writer.write("\"") 808 if self.childNodes: 809 writer.write(">%s"%(newl)) 810 for node in self.childNodes: 811 node.writexml(writer,indent+addindent,addindent,newl) 812 writer.write("%s</%s>%s" % (indent,self.tagName,newl)) 813 else: 814 writer.write("/>%s"%(newl)) 815 816 def _get_attributes(self): 817 return NamedNodeMap(self._attrs, self._attrsNS, self) 818 819 def hasAttributes(self): 820 if self._attrs: 821 return True 822 else: 823 return False 824 825 # DOM Level 3 attributes, based on the 22 Oct 2002 draft 826 827 def setIdAttribute(self, name): 828 idAttr = self.getAttributeNode(name) 829 self.setIdAttributeNode(idAttr) 830 831 def setIdAttributeNS(self, namespaceURI, localName): 832 idAttr = self.getAttributeNodeNS(namespaceURI, localName) 833 self.setIdAttributeNode(idAttr) 834 835 def setIdAttributeNode(self, idAttr): 836 if idAttr is None or not self.isSameNode(idAttr.ownerElement): 837 raise xml.dom.NotFoundErr() 838 if _get_containing_entref(self) is not None: 839 raise xml.dom.NoModificationAllowedErr() 840 if not idAttr._is_id: 841 idAttr.__dict__['_is_id'] = True 842 self._magic_id_nodes += 1 843 self.ownerDocument._magic_id_count += 1 844 _clear_id_cache(self) 845 846defproperty(Element, "attributes", 847 doc="NamedNodeMap of attributes on the element.") 848defproperty(Element, "localName", 849 doc="Namespace-local name of this element.") 850 851 852def _set_attribute_node(element, attr): 853 _clear_id_cache(element) 854 element._attrs[attr.name] = attr 855 element._attrsNS[(attr.namespaceURI, attr.localName)] = attr 856 857 # This creates a circular reference, but Element.unlink() 858 # breaks the cycle since the references to the attribute 859 # dictionaries are tossed. 860 attr.__dict__['ownerElement'] = element 861 862 863class Childless: 864 """Mixin that makes childless-ness easy to implement and avoids 865 the complexity of the Node methods that deal with children. 866 """ 867 868 attributes = None 869 childNodes = EmptyNodeList() 870 firstChild = None 871 lastChild = None 872 873 def _get_firstChild(self): 874 return None 875 876 def _get_lastChild(self): 877 return None 878 879 def appendChild(self, node): 880 raise xml.dom.HierarchyRequestErr( 881 self.nodeName + " nodes cannot have children") 882 883 def hasChildNodes(self): 884 return False 885 886 def insertBefore(self, newChild, refChild): 887 raise xml.dom.HierarchyRequestErr( 888 self.nodeName + " nodes do not have children") 889 890 def removeChild(self, oldChild): 891 raise xml.dom.NotFoundErr( 892 self.nodeName + " nodes do not have children") 893 894 def normalize(self): 895 # For childless nodes, normalize() has nothing to do. 896 pass 897 898 def replaceChild(self, newChild, oldChild): 899 raise xml.dom.HierarchyRequestErr( 900 self.nodeName + " nodes do not have children") 901 902 903class ProcessingInstruction(Childless, Node): 904 nodeType = Node.PROCESSING_INSTRUCTION_NODE 905 906 def __init__(self, target, data): 907 self.target = self.nodeName = target 908 self.data = self.nodeValue = data 909 910 def _get_data(self): 911 return self.data 912 def _set_data(self, value): 913 d = self.__dict__ 914 d['data'] = d['nodeValue'] = value 915 916 def _get_target(self): 917 return self.target 918 def _set_target(self, value): 919 d = self.__dict__ 920 d['target'] = d['nodeName'] = value 921 922 def __setattr__(self, name, value): 923 if name == "data" or name == "nodeValue": 924 self.__dict__['data'] = self.__dict__['nodeValue'] = value 925 elif name == "target" or name == "nodeName": 926 self.__dict__['target'] = self.__dict__['nodeName'] = value 927 else: 928 self.__dict__[name] = value 929 930 def writexml(self, writer, indent="", addindent="", newl=""): 931 writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl)) 932 933 934class CharacterData(Childless, Node): 935 def _get_length(self): 936 return len(self.data) 937 __len__ = _get_length 938 939 def _get_data(self): 940 return self.__dict__['data'] 941 def _set_data(self, data): 942 d = self.__dict__ 943 d['data'] = d['nodeValue'] = data 944 945 _get_nodeValue = _get_data 946 _set_nodeValue = _set_data 947 948 def __setattr__(self, name, value): 949 if name == "data" or name == "nodeValue": 950 self.__dict__['data'] = self.__dict__['nodeValue'] = value 951 else: 952 self.__dict__[name] = value 953 954 def __repr__(self): 955 data = self.data 956 if len(data) > 10: 957 dotdotdot = "..." 958 else: 959 dotdotdot = "" 960 return '<DOM %s node "%r%s">' % ( 961 self.__class__.__name__, data[0:10], dotdotdot) 962 963 def substringData(self, offset, count): 964 if offset < 0: 965 raise xml.dom.IndexSizeErr("offset cannot be negative") 966 if offset >= len(self.data): 967 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 968 if count < 0: 969 raise xml.dom.IndexSizeErr("count cannot be negative") 970 return self.data[offset:offset+count] 971 972 def appendData(self, arg): 973 self.data = self.data + arg 974 975 def insertData(self, offset, arg): 976 if offset < 0: 977 raise xml.dom.IndexSizeErr("offset cannot be negative") 978 if offset >= len(self.data): 979 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 980 if arg: 981 self.data = "%s%s%s" % ( 982 self.data[:offset], arg, self.data[offset:]) 983 984 def deleteData(self, offset, count): 985 if offset < 0: 986 raise xml.dom.IndexSizeErr("offset cannot be negative") 987 if offset >= len(self.data): 988 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 989 if count < 0: 990 raise xml.dom.IndexSizeErr("count cannot be negative") 991 if count: 992 self.data = self.data[:offset] + self.data[offset+count:] 993 994 def replaceData(self, offset, count, arg): 995 if offset < 0: 996 raise xml.dom.IndexSizeErr("offset cannot be negative") 997 if offset >= len(self.data): 998 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 999 if count < 0: 1000 raise xml.dom.IndexSizeErr("count cannot be negative") 1001 if count: 1002 self.data = "%s%s%s" % ( 1003 self.data[:offset], arg, self.data[offset+count:]) 1004 1005defproperty(CharacterData, "length", doc="Length of the string data.") 1006 1007 1008class Text(CharacterData): 1009 # Make sure we don't add an instance __dict__ if we don't already 1010 # have one, at least when that's possible: 1011 # XXX this does not work, CharacterData is an old-style class 1012 # __slots__ = () 1013 1014 nodeType = Node.TEXT_NODE 1015 nodeName = "#text" 1016 attributes = None 1017 1018 def splitText(self, offset): 1019 if offset < 0 or offset > len(self.data): 1020 raise xml.dom.IndexSizeErr("illegal offset value") 1021 newText = self.__class__() 1022 newText.data = self.data[offset:] 1023 newText.ownerDocument = self.ownerDocument 1024 next = self.nextSibling 1025 if self.parentNode and self in self.parentNode.childNodes: 1026 if next is None: 1027 self.parentNode.appendChild(newText) 1028 else: 1029 self.parentNode.insertBefore(newText, next) 1030 self.data = self.data[:offset] 1031 return newText 1032 1033 def writexml(self, writer, indent="", addindent="", newl=""): 1034 _write_data(writer, "%s%s%s"%(indent, self.data, newl)) 1035 1036 # DOM Level 3 (WD 9 April 2002) 1037 1038 def _get_wholeText(self): 1039 L = [self.data] 1040 n = self.previousSibling 1041 while n is not None: 1042 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1043 L.insert(0, n.data) 1044 n = n.previousSibling 1045 else: 1046 break 1047 n = self.nextSibling 1048 while n is not None: 1049 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1050 L.append(n.data) 1051 n = n.nextSibling 1052 else: 1053 break 1054 return ''.join(L) 1055 1056 def replaceWholeText(self, content): 1057 # XXX This needs to be seriously changed if minidom ever 1058 # supports EntityReference nodes. 1059 parent = self.parentNode 1060 n = self.previousSibling 1061 while n is not None: 1062 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1063 next = n.previousSibling 1064 parent.removeChild(n) 1065 n = next 1066 else: 1067 break 1068 n = self.nextSibling 1069 if not content: 1070 parent.removeChild(self) 1071 while n is not None: 1072 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1073 next = n.nextSibling 1074 parent.removeChild(n) 1075 n = next 1076 else: 1077 break 1078 if content: 1079 d = self.__dict__ 1080 d['data'] = content 1081 d['nodeValue'] = content 1082 return self 1083 else: 1084 return None 1085 1086 def _get_isWhitespaceInElementContent(self): 1087 if self.data.strip(): 1088 return False 1089 elem = _get_containing_element(self) 1090 if elem is None: 1091 return False 1092 info = self.ownerDocument._get_elem_info(elem) 1093 if info is None: 1094 return False 1095 else: 1096 return info.isElementContent() 1097 1098defproperty(Text, "isWhitespaceInElementContent", 1099 doc="True iff this text node contains only whitespace" 1100 " and is in element content.") 1101defproperty(Text, "wholeText", 1102 doc="The text of all logically-adjacent text nodes.") 1103 1104 1105def _get_containing_element(node): 1106 c = node.parentNode 1107 while c is not None: 1108 if c.nodeType == Node.ELEMENT_NODE: 1109 return c 1110 c = c.parentNode 1111 return None 1112 1113def _get_containing_entref(node): 1114 c = node.parentNode 1115 while c is not None: 1116 if c.nodeType == Node.ENTITY_REFERENCE_NODE: 1117 return c 1118 c = c.parentNode 1119 return None 1120 1121 1122class Comment(Childless, CharacterData): 1123 nodeType = Node.COMMENT_NODE 1124 nodeName = "#comment" 1125 1126 def __init__(self, data): 1127 self.data = self.nodeValue = data 1128 1129 def writexml(self, writer, indent="", addindent="", newl=""): 1130 if "--" in self.data: 1131 raise ValueError("'--' is not allowed in a comment node") 1132 writer.write("%s<!--%s-->%s" % (indent, self.data, newl)) 1133 1134 1135class CDATASection(Text): 1136 # Make sure we don't add an instance __dict__ if we don't already 1137 # have one, at least when that's possible: 1138 # XXX this does not work, Text is an old-style class 1139 # __slots__ = () 1140 1141 nodeType = Node.CDATA_SECTION_NODE 1142 nodeName = "#cdata-section" 1143 1144 def writexml(self, writer, indent="", addindent="", newl=""): 1145 if self.data.find("]]>") >= 0: 1146 raise ValueError("']]>' not allowed in a CDATA section") 1147 writer.write("<![CDATA[%s]]>" % self.data) 1148 1149 1150class ReadOnlySequentialNamedNodeMap(object): 1151 __slots__ = '_seq', 1152 1153 def __init__(self, seq=()): 1154 # seq should be a list or tuple 1155 self._seq = seq 1156 1157 def __len__(self): 1158 return len(self._seq) 1159 1160 def _get_length(self): 1161 return len(self._seq) 1162 1163 def getNamedItem(self, name): 1164 for n in self._seq: 1165 if n.nodeName == name: 1166 return n 1167 1168 def getNamedItemNS(self, namespaceURI, localName): 1169 for n in self._seq: 1170 if n.namespaceURI == namespaceURI and n.localName == localName: 1171 return n 1172 1173 def __getitem__(self, name_or_tuple): 1174 if isinstance(name_or_tuple, tuple): 1175 node = self.getNamedItemNS(*name_or_tuple) 1176 else: 1177 node = self.getNamedItem(name_or_tuple) 1178 if node is None: 1179 raise KeyError, name_or_tuple 1180 return node 1181 1182 def item(self, index): 1183 if index < 0: 1184 return None 1185 try: 1186 return self._seq[index] 1187 except IndexError: 1188 return None 1189 1190 def removeNamedItem(self, name): 1191 raise xml.dom.NoModificationAllowedErr( 1192 "NamedNodeMap instance is read-only") 1193 1194 def removeNamedItemNS(self, namespaceURI, localName): 1195 raise xml.dom.NoModificationAllowedErr( 1196 "NamedNodeMap instance is read-only") 1197 1198 def setNamedItem(self, node): 1199 raise xml.dom.NoModificationAllowedErr( 1200 "NamedNodeMap instance is read-only") 1201 1202 def setNamedItemNS(self, node): 1203 raise xml.dom.NoModificationAllowedErr( 1204 "NamedNodeMap instance is read-only") 1205 1206 def __getstate__(self): 1207 return [self._seq] 1208 1209 def __setstate__(self, state): 1210 self._seq = state[0] 1211 1212defproperty(ReadOnlySequentialNamedNodeMap, "length", 1213 doc="Number of entries in the NamedNodeMap.") 1214 1215 1216class Identified: 1217 """Mix-in class that supports the publicId and systemId attributes.""" 1218 1219 # XXX this does not work, this is an old-style class 1220 # __slots__ = 'publicId', 'systemId' 1221 1222 def _identified_mixin_init(self, publicId, systemId): 1223 self.publicId = publicId 1224 self.systemId = systemId 1225 1226 def _get_publicId(self): 1227 return self.publicId 1228 1229 def _get_systemId(self): 1230 return self.systemId 1231 1232class DocumentType(Identified, Childless, Node): 1233 nodeType = Node.DOCUMENT_TYPE_NODE 1234 nodeValue = None 1235 name = None 1236 publicId = None 1237 systemId = None 1238 internalSubset = None 1239 1240 def __init__(self, qualifiedName): 1241 self.entities = ReadOnlySequentialNamedNodeMap() 1242 self.notations = ReadOnlySequentialNamedNodeMap() 1243 if qualifiedName: 1244 prefix, localname = _nssplit(qualifiedName) 1245 self.name = localname 1246 self.nodeName = self.name 1247 1248 def _get_internalSubset(self): 1249 return self.internalSubset 1250 1251 def cloneNode(self, deep): 1252 if self.ownerDocument is None: 1253 # it's ok 1254 clone = DocumentType(None) 1255 clone.name = self.name 1256 clone.nodeName = self.name 1257 operation = xml.dom.UserDataHandler.NODE_CLONED 1258 if deep: 1259 clone.entities._seq = [] 1260 clone.notations._seq = [] 1261 for n in self.notations._seq: 1262 notation = Notation(n.nodeName, n.publicId, n.systemId) 1263 clone.notations._seq.append(notation) 1264 n._call_user_data_handler(operation, n, notation) 1265 for e in self.entities._seq: 1266 entity = Entity(e.nodeName, e.publicId, e.systemId, 1267 e.notationName) 1268 entity.actualEncoding = e.actualEncoding 1269 entity.encoding = e.encoding 1270 entity.version = e.version 1271 clone.entities._seq.append(entity) 1272 e._call_user_data_handler(operation, n, entity) 1273 self._call_user_data_handler(operation, self, clone) 1274 return clone 1275 else: 1276 return None 1277 1278 def writexml(self, writer, indent="", addindent="", newl=""): 1279 writer.write("<!DOCTYPE ") 1280 writer.write(self.name) 1281 if self.publicId: 1282 writer.write("%s PUBLIC '%s'%s '%s'" 1283 % (newl, self.publicId, newl, self.systemId)) 1284 elif self.systemId: 1285 writer.write("%s SYSTEM '%s'" % (newl, self.systemId)) 1286 if self.internalSubset is not None: 1287 writer.write(" [") 1288 writer.write(self.internalSubset) 1289 writer.write("]") 1290 writer.write(">"+newl) 1291 1292class Entity(Identified, Node): 1293 attributes = None 1294 nodeType = Node.ENTITY_NODE 1295 nodeValue = None 1296 1297 actualEncoding = None 1298 encoding = None 1299 version = None 1300 1301 def __init__(self, name, publicId, systemId, notation): 1302 self.nodeName = name 1303 self.notationName = notation 1304 self.childNodes = NodeList() 1305 self._identified_mixin_init(publicId, systemId) 1306 1307 def _get_actualEncoding(self): 1308 return self.actualEncoding 1309 1310 def _get_encoding(self): 1311 return self.encoding 1312 1313 def _get_version(self): 1314 return self.version 1315 1316 def appendChild(self, newChild): 1317 raise xml.dom.HierarchyRequestErr( 1318 "cannot append children to an entity node") 1319 1320 def insertBefore(self, newChild, refChild): 1321 raise xml.dom.HierarchyRequestErr( 1322 "cannot insert children below an entity node") 1323 1324 def removeChild(self, oldChild): 1325 raise xml.dom.HierarchyRequestErr( 1326 "cannot remove children from an entity node") 1327 1328 def replaceChild(self, newChild, oldChild): 1329 raise xml.dom.HierarchyRequestErr( 1330 "cannot replace children of an entity node") 1331 1332class Notation(Identified, Childless, Node): 1333 nodeType = Node.NOTATION_NODE 1334 nodeValue = None 1335 1336 def __init__(self, name, publicId, systemId): 1337 self.nodeName = name 1338 self._identified_mixin_init(publicId, systemId) 1339 1340 1341class DOMImplementation(DOMImplementationLS): 1342 _features = [("core", "1.0"), 1343 ("core", "2.0"), 1344 ("core", None), 1345 ("xml", "1.0"), 1346 ("xml", "2.0"), 1347 ("xml", None), 1348 ("ls-load", "3.0"), 1349 ("ls-load", None), 1350 ] 1351 1352 def hasFeature(self, feature, version): 1353 if version == "": 1354 version = None 1355 return (feature.lower(), version) in self._features 1356 1357 def createDocument(self, namespaceURI, qualifiedName, doctype): 1358 if doctype and doctype.parentNode is not None: 1359 raise xml.dom.WrongDocumentErr( 1360 "doctype object owned by another DOM tree") 1361 doc = self._create_document() 1362 1363 add_root_element = not (namespaceURI is None 1364 and qualifiedName is None 1365 and doctype is None) 1366 1367 if not qualifiedName and add_root_element: 1368 # The spec is unclear what to raise here; SyntaxErr 1369 # would be the other obvious candidate. Since Xerces raises 1370 # InvalidCharacterErr, and since SyntaxErr is not listed 1371 # for createDocument, that seems to be the better choice. 1372 # XXX: need to check for illegal characters here and in 1373 # createElement. 1374 1375 # DOM Level III clears this up when talking about the return value 1376 # of this function. If namespaceURI, qName and DocType are 1377 # Null the document is returned without a document element 1378 # Otherwise if doctype or namespaceURI are not None 1379 # Then we go back to the above problem 1380 raise xml.dom.InvalidCharacterErr("Element with no name") 1381 1382 if add_root_element: 1383 prefix, localname = _nssplit(qualifiedName) 1384 if prefix == "xml" \ 1385 and namespaceURI != "http://www.w3.org/XML/1998/namespace": 1386 raise xml.dom.NamespaceErr("illegal use of 'xml' prefix") 1387 if prefix and not namespaceURI: 1388 raise xml.dom.NamespaceErr( 1389 "illegal use of prefix without namespaces") 1390 element = doc.createElementNS(namespaceURI, qualifiedName) 1391 if doctype: 1392 doc.appendChild(doctype) 1393 doc.appendChild(element) 1394 1395 if doctype: 1396 doctype.parentNode = doctype.ownerDocument = doc 1397 1398 doc.doctype = doctype 1399 doc.implementation = self 1400 return doc 1401 1402 def createDocumentType(self, qualifiedName, publicId, systemId): 1403 doctype = DocumentType(qualifiedName) 1404 doctype.publicId = publicId 1405 doctype.systemId = systemId 1406 return doctype 1407 1408 # DOM Level 3 (WD 9 April 2002) 1409 1410 def getInterface(self, feature): 1411 if self.hasFeature(feature, None): 1412 return self 1413 else: 1414 return None 1415 1416 # internal 1417 def _create_document(self): 1418 return Document() 1419 1420class ElementInfo(object): 1421 """Object that represents content-model information for an element. 1422 1423 This implementation is not expected to be used in practice; DOM 1424 builders should provide implementations which do the right thing 1425 using information available to it. 1426 1427 """ 1428 1429 __slots__ = 'tagName', 1430 1431 def __init__(self, name): 1432 self.tagName = name 1433 1434 def getAttributeType(self, aname): 1435 return _no_type 1436 1437 def getAttributeTypeNS(self, namespaceURI, localName): 1438 return _no_type 1439 1440 def isElementContent(self): 1441 return False 1442 1443 def isEmpty(self): 1444 """Returns true iff this element is declared to have an EMPTY 1445 content model.""" 1446 return False 1447 1448 def isId(self, aname): 1449 """Returns true iff the named attribute is a DTD-style ID.""" 1450 return False 1451 1452 def isIdNS(self, namespaceURI, localName): 1453 """Returns true iff the identified attribute is a DTD-style ID.""" 1454 return False 1455 1456 def __getstate__(self): 1457 return self.tagName 1458 1459 def __setstate__(self, state): 1460 self.tagName = state 1461 1462def _clear_id_cache(node): 1463 if node.nodeType == Node.DOCUMENT_NODE: 1464 node._id_cache.clear() 1465 node._id_search_stack = None 1466 elif _in_document(node): 1467 node.ownerDocument._id_cache.clear() 1468 node.ownerDocument._id_search_stack= None 1469 1470class Document(Node, DocumentLS): 1471 _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE, 1472 Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE) 1473 1474 nodeType = Node.DOCUMENT_NODE 1475 nodeName = "#document" 1476 nodeValue = None 1477 attributes = None 1478 doctype = None 1479 parentNode = None 1480 previousSibling = nextSibling = None 1481 1482 implementation = DOMImplementation() 1483 1484 # Document attributes from Level 3 (WD 9 April 2002) 1485 1486 actualEncoding = None 1487 encoding = None 1488 standalone = None 1489 version = None 1490 strictErrorChecking = False 1491 errorHandler = None 1492 documentURI = None 1493 1494 _magic_id_count = 0 1495 1496 def __init__(self): 1497 self.childNodes = NodeList() 1498 # mapping of (namespaceURI, localName) -> ElementInfo 1499 # and tagName -> ElementInfo 1500 self._elem_info = {} 1501 self._id_cache = {} 1502 self._id_search_stack = None 1503 1504 def _get_elem_info(self, element): 1505 if element.namespaceURI: 1506 key = element.namespaceURI, element.localName 1507 else: 1508 key = element.tagName 1509 return self._elem_info.get(key) 1510 1511 def _get_actualEncoding(self): 1512 return self.actualEncoding 1513 1514 def _get_doctype(self): 1515 return self.doctype 1516 1517 def _get_documentURI(self): 1518 return self.documentURI 1519 1520 def _get_encoding(self): 1521 return self.encoding 1522 1523 def _get_errorHandler(self): 1524 return self.errorHandler 1525 1526 def _get_standalone(self): 1527 return self.standalone 1528 1529 def _get_strictErrorChecking(self): 1530 return self.strictErrorChecking 1531 1532 def _get_version(self): 1533 return self.version 1534 1535 def appendChild(self, node): 1536 if node.nodeType not in self._child_node_types: 1537 raise xml.dom.HierarchyRequestErr( 1538 "%s cannot be child of %s" % (repr(node), repr(self))) 1539 if node.parentNode is not None: 1540 # This needs to be done before the next test since this 1541 # may *be* the document element, in which case it should 1542 # end up re-ordered to the end. 1543 node.parentNode.removeChild(node) 1544 1545 if node.nodeType == Node.ELEMENT_NODE \ 1546 and self._get_documentElement(): 1547 raise xml.dom.HierarchyRequestErr( 1548 "two document elements disallowed") 1549 return Node.appendChild(self, node) 1550 1551 def removeChild(self, oldChild): 1552 try: 1553 self.childNodes.remove(oldChild) 1554 except ValueError: 1555 raise xml.dom.NotFoundErr() 1556 oldChild.nextSibling = oldChild.previousSibling = None 1557 oldChild.parentNode = None 1558 if self.documentElement is oldChild: 1559 self.documentElement = None 1560 1561 return oldChild 1562 1563 def _get_documentElement(self): 1564 for node in self.childNodes: 1565 if node.nodeType == Node.ELEMENT_NODE: 1566 return node 1567 1568 def unlink(self): 1569 if self.doctype is not None: 1570 self.doctype.unlink() 1571 self.doctype = None 1572 Node.unlink(self) 1573 1574 def cloneNode(self, deep): 1575 if not deep: 1576 return None 1577 clone = self.implementation.createDocument(None, None, None) 1578 clone.encoding = self.encoding 1579 clone.standalone = self.standalone 1580 clone.version = self.version 1581 for n in self.childNodes: 1582 childclone = _clone_node(n, deep, clone) 1583 assert childclone.ownerDocument.isSameNode(clone) 1584 clone.childNodes.append(childclone) 1585 if childclone.nodeType == Node.DOCUMENT_NODE: 1586 assert clone.documentElement is None 1587 elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE: 1588 assert clone.doctype is None 1589 clone.doctype = childclone 1590 childclone.parentNode = clone 1591 self._call_user_data_handler(xml.dom.UserDataHandler.NODE_CLONED, 1592 self, clone) 1593 return clone 1594 1595 def createDocumentFragment(self): 1596 d = DocumentFragment() 1597 d.ownerDocument = self 1598 return d 1599 1600 def createElement(self, tagName): 1601 e = Element(tagName) 1602 e.ownerDocument = self 1603 return e 1604 1605 def createTextNode(self, data): 1606 if not isinstance(data, StringTypes): 1607 raise TypeError, "node contents must be a string" 1608 t = Text() 1609 t.data = data 1610 t.ownerDocument = self 1611 return t 1612 1613 def createCDATASection(self, data): 1614 if not isinstance(data, StringTypes): 1615 raise TypeError, "node contents must be a string" 1616 c = CDATASection() 1617 c.data = data 1618 c.ownerDocument = self 1619 return c 1620 1621 def createComment(self, data): 1622 c = Comment(data) 1623 c.ownerDocument = self 1624 return c 1625 1626 def createProcessingInstruction(self, target, data): 1627 p = ProcessingInstruction(target, data) 1628 p.ownerDocument = self 1629 return p 1630 1631 def createAttribute(self, qName): 1632 a = Attr(qName) 1633 a.ownerDocument = self 1634 a.value = "" 1635 return a 1636 1637 def createElementNS(self, namespaceURI, qualifiedName): 1638 prefix, localName = _nssplit(qualifiedName) 1639 e = Element(qualifiedName, namespaceURI, prefix) 1640 e.ownerDocument = self 1641 return e 1642 1643 def createAttributeNS(self, namespaceURI, qualifiedName): 1644 prefix, localName = _nssplit(qualifiedName) 1645 a = Attr(qualifiedName, namespaceURI, localName, prefix) 1646 a.ownerDocument = self 1647 a.value = "" 1648 return a 1649 1650 # A couple of implementation-specific helpers to create node types 1651 # not supported by the W3C DOM specs: 1652 1653 def _create_entity(self, name, publicId, systemId, notationName): 1654 e = Entity(name, publicId, systemId, notationName) 1655 e.ownerDocument = self 1656 return e 1657 1658 def _create_notation(self, name, publicId, systemId): 1659 n = Notation(name, publicId, systemId) 1660 n.ownerDocument = self 1661 return n 1662 1663 def getElementById(self, id): 1664 if id in self._id_cache: 1665 return self._id_cache[id] 1666 if not (self._elem_info or self._magic_id_count): 1667 return None 1668 1669 stack = self._id_search_stack 1670 if stack is None: 1671 # we never searched before, or the cache has been cleared 1672 stack = [self.documentElement] 1673 self._id_search_stack = stack 1674 elif not stack: 1675 # Previous search was completed and cache is still valid; 1676 # no matching node. 1677 return None 1678 1679 result = None 1680 while stack: 1681 node = stack.pop() 1682 # add child elements to stack for continued searching 1683 stack.extend([child for child in node.childNodes 1684 if child.nodeType in _nodeTypes_with_children]) 1685 # check this node 1686 info = self._get_elem_info(node) 1687 if info: 1688 # We have to process all ID attributes before 1689 # returning in order to get all the attributes set to 1690 # be IDs using Element.setIdAttribute*(). 1691 for attr in node.attributes.values(): 1692 if attr.namespaceURI: 1693 if info.isIdNS(attr.namespaceURI, attr.localName): 1694 self._id_cache[attr.value] = node 1695 if attr.value == id: 1696 result = node 1697 elif not node._magic_id_nodes: 1698 break 1699 elif info.isId(attr.name): 1700 self._id_cache[attr.value] = node 1701 if attr.value == id: 1702 result = node 1703 elif not node._magic_id_nodes: 1704 break 1705 elif attr._is_id: 1706 self._id_cache[attr.value] = node 1707 if attr.value == id: 1708 result = node 1709 elif node._magic_id_nodes == 1: 1710 break 1711 elif node._magic_id_nodes: 1712 for attr in node.attributes.values(): 1713 if attr._is_id: 1714 self._id_cache[attr.value] = node 1715 if attr.value == id: 1716 result = node 1717 if result is not None: 1718 break 1719 return result 1720 1721 def getElementsByTagName(self, name): 1722 return _get_elements_by_tagName_helper(self, name, NodeList()) 1723 1724 def getElementsByTagNameNS(self, namespaceURI, localName): 1725 return _get_elements_by_tagName_ns_helper( 1726 self, namespaceURI, localName, NodeList()) 1727 1728 def isSupported(self, feature, version): 1729 return self.implementation.hasFeature(feature, version) 1730 1731 def importNode(self, node, deep): 1732 if node.nodeType == Node.DOCUMENT_NODE: 1733 raise xml.dom.NotSupportedErr("cannot import document nodes") 1734 elif node.nodeType == Node.DOCUMENT_TYPE_NODE: 1735 raise xml.dom.NotSupportedErr("cannot import document type nodes") 1736 return _clone_node(node, deep, self) 1737 1738 def writexml(self, writer, indent="", addindent="", newl="", 1739 encoding = None): 1740 if encoding is None: 1741 writer.write('<?xml version="1.0" ?>'+newl) 1742 else: 1743 writer.write('<?xml version="1.0" encoding="%s"?>%s' % (encoding, newl)) 1744 for node in self.childNodes: 1745 node.writexml(writer, indent, addindent, newl) 1746 1747 # DOM Level 3 (WD 9 April 2002) 1748 1749 def renameNode(self, n, namespaceURI, name): 1750 if n.ownerDocument is not self: 1751 raise xml.dom.WrongDocumentErr( 1752 "cannot rename nodes from other documents;\n" 1753 "expected %s,\nfound %s" % (self, n.ownerDocument)) 1754 if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE): 1755 raise xml.dom.NotSupportedErr( 1756 "renameNode() only applies to element and attribute nodes") 1757 if namespaceURI != EMPTY_NAMESPACE: 1758 if ':' in name: 1759 prefix, localName = name.split(':', 1) 1760 if ( prefix == "xmlns" 1761 and namespaceURI != xml.dom.XMLNS_NAMESPACE): 1762 raise xml.dom.NamespaceErr( 1763 "illegal use of 'xmlns' prefix") 1764 else: 1765 if ( name == "xmlns" 1766 and namespaceURI != xml.dom.XMLNS_NAMESPACE 1767 and n.nodeType == Node.ATTRIBUTE_NODE): 1768 raise xml.dom.NamespaceErr( 1769 "illegal use of the 'xmlns' attribute") 1770 prefix = None 1771 localName = name 1772 else: 1773 prefix = None 1774 localName = None 1775 if n.nodeType == Node.ATTRIBUTE_NODE: 1776 element = n.ownerElement 1777 if element is not None: 1778 is_id = n._is_id 1779 element.removeAttributeNode(n) 1780 else: 1781 element = None 1782 # avoid __setattr__ 1783 d = n.__dict__ 1784 d['prefix'] = prefix 1785 d['localName'] = localName 1786 d['namespaceURI'] = namespaceURI 1787 d['nodeName'] = name 1788 if n.nodeType == Node.ELEMENT_NODE: 1789 d['tagName'] = name 1790 else: 1791 # attribute node 1792 d['name'] = name 1793 if element is not None: 1794 element.setAttributeNode(n) 1795 if is_id: 1796 element.setIdAttributeNode(n) 1797 # It's not clear from a semantic perspective whether we should 1798 # call the user data handlers for the NODE_RENAMED event since 1799 # we're re-using the existing node. The draft spec has been 1800 # interpreted as meaning "no, don't call the handler unless a 1801 # new node is created." 1802 return n 1803 1804defproperty(Document, "documentElement", 1805 doc="Top-level element of this document.") 1806 1807 1808def _clone_node(node, deep, newOwnerDocument): 1809 """ 1810 Clone a node and give it the new owner document. 1811 Called by Node.cloneNode and Document.importNode 1812 """ 1813 if node.ownerDocument.isSameNode(newOwnerDocument): 1814 operation = xml.dom.UserDataHandler.NODE_CLONED 1815 else: 1816 operation = xml.dom.UserDataHandler.NODE_IMPORTED 1817 if node.nodeType == Node.ELEMENT_NODE: 1818 clone = newOwnerDocument.createElementNS(node.namespaceURI, 1819 node.nodeName) 1820 for attr in node.attributes.values(): 1821 clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value) 1822 a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName) 1823 a.specified = attr.specified 1824 1825 if deep: 1826 for child in node.childNodes: 1827 c = _clone_node(child, deep, newOwnerDocument) 1828 clone.appendChild(c) 1829 1830 elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE: 1831 clone = newOwnerDocument.createDocumentFragment() 1832 if deep: 1833 for child in node.childNodes: 1834 c = _clone_node(child, deep, newOwnerDocument) 1835 clone.appendChild(c) 1836 1837 elif node.nodeType == Node.TEXT_NODE: 1838 clone = newOwnerDocument.createTextNode(node.data) 1839 elif node.nodeType == Node.CDATA_SECTION_NODE: 1840 clone = newOwnerDocument.createCDATASection(node.data) 1841 elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE: 1842 clone = newOwnerDocument.createProcessingInstruction(node.target, 1843 node.data) 1844 elif node.nodeType == Node.COMMENT_NODE: 1845 clone = newOwnerDocument.createComment(node.data) 1846 elif node.nodeType == Node.ATTRIBUTE_NODE: 1847 clone = newOwnerDocument.createAttributeNS(node.namespaceURI, 1848 node.nodeName) 1849 clone.specified = True 1850 clone.value = node.value 1851 elif node.nodeType == Node.DOCUMENT_TYPE_NODE: 1852 assert node.ownerDocument is not newOwnerDocument 1853 operation = xml.dom.UserDataHandler.NODE_IMPORTED 1854 clone = newOwnerDocument.implementation.createDocumentType( 1855 node.name, node.publicId, node.systemId) 1856 clone.ownerDocument = newOwnerDocument 1857 if deep: 1858 clone.entities._seq = [] 1859 clone.notations._seq = [] 1860 for n in node.notations._seq: 1861 notation = Notation(n.nodeName, n.publicId, n.systemId) 1862 notation.ownerDocument = newOwnerDocument 1863 clone.notations._seq.append(notation) 1864 if hasattr(n, '_call_user_data_handler'): 1865 n._call_user_data_handler(operation, n, notation) 1866 for e in node.entities._seq: 1867 entity = Entity(e.nodeName, e.publicId, e.systemId, 1868 e.notationName) 1869 entity.actualEncoding = e.actualEncoding 1870 entity.encoding = e.encoding 1871 entity.version = e.version 1872 entity.ownerDocument = newOwnerDocument 1873 clone.entities._seq.append(entity) 1874 if hasattr(e, '_call_user_data_handler'): 1875 e._call_user_data_handler(operation, n, entity) 1876 else: 1877 # Note the cloning of Document and DocumentType nodes is 1878 # implementation specific. minidom handles those cases 1879 # directly in the cloneNode() methods. 1880 raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node)) 1881 1882 # Check for _call_user_data_handler() since this could conceivably 1883 # used with other DOM implementations (one of the FourThought 1884 # DOMs, perhaps?). 1885 if hasattr(node, '_call_user_data_handler'): 1886 node._call_user_data_handler(operation, node, clone) 1887 return clone 1888 1889 1890def _nssplit(qualifiedName): 1891 fields = qualifiedName.split(':', 1) 1892 if len(fields) == 2: 1893 return fields 1894 else: 1895 return (None, fields[0]) 1896 1897 1898def _get_StringIO(): 1899 # we can't use cStringIO since it doesn't support Unicode strings 1900 from StringIO import StringIO 1901 return StringIO() 1902 1903def _do_pulldom_parse(func, args, kwargs): 1904 events = func(*args, **kwargs) 1905 toktype, rootNode = events.getEvent() 1906 events.expandNode(rootNode) 1907 events.clear() 1908 return rootNode 1909 1910def parse(file, parser=None, bufsize=None): 1911 """Parse a file into a DOM by filename or file object.""" 1912 if parser is None and not bufsize: 1913 from xml.dom import expatbuilder 1914 return expatbuilder.parse(file) 1915 else: 1916 from xml.dom import pulldom 1917 return _do_pulldom_parse(pulldom.parse, (file,), 1918 {'parser': parser, 'bufsize': bufsize}) 1919 1920def parseString(string, parser=None): 1921 """Parse a file into a DOM from a string.""" 1922 if parser is None: 1923 from xml.dom import expatbuilder 1924 return expatbuilder.parseString(string) 1925 else: 1926 from xml.dom import pulldom 1927 return _do_pulldom_parse(pulldom.parseString, (string,), 1928 {'parser': parser}) 1929 1930def getDOMImplementation(features=None): 1931 if features: 1932 if isinstance(features, StringTypes): 1933 features = domreg._parse_feature_string(features) 1934 for f, v in features: 1935 if not Document.implementation.hasFeature(f, v): 1936 return None 1937 return Document.implementation 1938