1/* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18/* 19 * $Id: DOMHelper.java 468655 2006-10-28 07:12:06Z minchau $ 20 */ 21package org.apache.xml.utils; 22 23import java.util.Hashtable; 24import java.util.Vector; 25 26import javax.xml.XMLConstants; 27import javax.xml.parsers.DocumentBuilder; 28import javax.xml.parsers.DocumentBuilderFactory; 29import javax.xml.parsers.ParserConfigurationException; 30 31import org.apache.xml.dtm.ref.DTMNodeProxy; 32import org.apache.xml.res.XMLErrorResources; 33import org.apache.xml.res.XMLMessages; 34 35import org.w3c.dom.Attr; 36import org.w3c.dom.DOMImplementation; 37import org.w3c.dom.Document; 38import org.w3c.dom.DocumentType; 39import org.w3c.dom.Element; 40import org.w3c.dom.Entity; 41import org.w3c.dom.NamedNodeMap; 42import org.w3c.dom.Node; 43import org.w3c.dom.Text; 44 45/** 46 * @deprecated Since the introduction of the DTM, this class will be removed. 47 * This class provides a front-end to DOM implementations, providing 48 * a number of utility functions that either aren't yet standardized 49 * by the DOM spec or that are defined in optional DOM modules and 50 * hence may not be present in all DOMs. 51 */ 52public class DOMHelper 53{ 54 55 /** 56 * DOM Level 1 did not have a standard mechanism for creating a new 57 * Document object. This function provides a DOM-implementation-independent 58 * abstraction for that for that concept. It's typically used when 59 * outputting a new DOM as the result of an operation. 60 * <p> 61 * TODO: This isn't directly compatable with DOM Level 2. 62 * The Level 2 createDocument call also creates the root 63 * element, and thus requires that you know what that element will be 64 * before creating the Document. We should think about whether we want 65 * to change this code, and the callers, so we can use the DOM's own 66 * method. (It's also possible that DOM Level 3 may relax this 67 * sequence, but you may give up some intelligence in the DOM by 68 * doing so; the intent was that knowing the document type and root 69 * element might let the DOM automatically switch to a specialized 70 * subclass for particular kinds of documents.) 71 * 72 * @param isSecureProcessing state of the secure processing feature. 73 * @return The newly created DOM Document object, with no children, or 74 * null if we can't find a DOM implementation that permits creating 75 * new empty Documents. 76 */ 77 public static Document createDocument(boolean isSecureProcessing) 78 { 79 80 try 81 { 82 83 // Use an implementation of the JAVA API for XML Parsing 1.0 to 84 // create a DOM Document node to contain the result. 85 DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance(); 86 87 dfactory.setNamespaceAware(true); 88 // BEGIN android-removed 89 // If set, DocumentBuilderFactoryImpl.newDocumentBuilder() fails 90 // because we haven't implemented validation 91 // dfactory.setValidating(true); 92 // BEGIN android-removed 93 94 // BEGIN android-removed 95 // We haven't implemented secure processing 96 // if (isSecureProcessing) 97 // { 98 // try 99 // { 100 // dfactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); 101 // } 102 // catch (ParserConfigurationException pce) {} 103 // } 104 // END android-removed 105 106 DocumentBuilder docBuilder = dfactory.newDocumentBuilder(); 107 Document outNode = docBuilder.newDocument(); 108 109 return outNode; 110 } 111 catch (ParserConfigurationException pce) 112 { 113 throw new RuntimeException( 114 XMLMessages.createXMLMessage( 115 XMLErrorResources.ER_CREATEDOCUMENT_NOT_SUPPORTED, null)); //"createDocument() not supported in XPathContext!"); 116 117 // return null; 118 } 119 } 120 121 /** 122 * DOM Level 1 did not have a standard mechanism for creating a new 123 * Document object. This function provides a DOM-implementation-independent 124 * abstraction for that for that concept. It's typically used when 125 * outputting a new DOM as the result of an operation. 126 * 127 * @return The newly created DOM Document object, with no children, or 128 * null if we can't find a DOM implementation that permits creating 129 * new empty Documents. 130 */ 131 public static Document createDocument() 132 { 133 return createDocument(false); 134 } 135 136 /** 137 * Tells, through the combination of the default-space attribute 138 * on xsl:stylesheet, xsl:strip-space, xsl:preserve-space, and the 139 * xml:space attribute, whether or not extra whitespace should be stripped 140 * from the node. Literal elements from template elements should 141 * <em>not</em> be tested with this function. 142 * @param textNode A text node from the source tree. 143 * @return true if the text node should be stripped of extra whitespace. 144 * 145 * @throws javax.xml.transform.TransformerException 146 * @xsl.usage advanced 147 */ 148 public boolean shouldStripSourceNode(Node textNode) 149 throws javax.xml.transform.TransformerException 150 { 151 152 // return (null == m_envSupport) ? false : m_envSupport.shouldStripSourceNode(textNode); 153 return false; 154 } 155 156 /** 157 * Supports the XPath function GenerateID by returning a unique 158 * identifier string for any given DOM Node. 159 * <p> 160 * Warning: The base implementation uses the Node object's hashCode(), 161 * which is NOT guaranteed to be unique. If that method hasn't been 162 * overridden in this DOM ipmlementation, most Java implementions will 163 * derive it from the object's address and should be OK... but if 164 * your DOM uses a different definition of hashCode (eg hashing the 165 * contents of the subtree), or if your DOM may have multiple objects 166 * that represent a single Node in the data structure (eg via proxying), 167 * you may need to find another way to assign a unique identifier. 168 * <p> 169 * Also, be aware that if nodes are destroyed and recreated, there is 170 * an open issue regarding whether an ID may be reused. Currently 171 * we're assuming that the input document is stable for the duration 172 * of the XPath/XSLT operation, so this shouldn't arise in this context. 173 * <p> 174 * (DOM Level 3 is investigating providing a unique node "key", but 175 * that won't help Level 1 and Level 2 implementations.) 176 * 177 * @param node whose identifier you want to obtain 178 * 179 * @return a string which should be different for every Node object. 180 */ 181 public String getUniqueID(Node node) 182 { 183 return "N" + Integer.toHexString(node.hashCode()).toUpperCase(); 184 } 185 186 /** 187 * Figure out whether node2 should be considered as being later 188 * in the document than node1, in Document Order as defined 189 * by the XPath model. This may not agree with the ordering defined 190 * by other XML applications. 191 * <p> 192 * There are some cases where ordering isn't defined, and neither are 193 * the results of this function -- though we'll generally return true. 194 * 195 * TODO: Make sure this does the right thing with attribute nodes!!! 196 * 197 * @param node1 DOM Node to perform position comparison on. 198 * @param node2 DOM Node to perform position comparison on . 199 * 200 * @return false if node2 comes before node1, otherwise return true. 201 * You can think of this as 202 * <code>(node1.documentOrderPosition <= node2.documentOrderPosition)</code>. 203 */ 204 public static boolean isNodeAfter(Node node1, Node node2) 205 { 206 if (node1 == node2 || isNodeTheSame(node1, node2)) 207 return true; 208 209 // Default return value, if there is no defined ordering 210 boolean isNodeAfter = true; 211 212 Node parent1 = getParentOfNode(node1); 213 Node parent2 = getParentOfNode(node2); 214 215 // Optimize for most common case 216 if (parent1 == parent2 || isNodeTheSame(parent1, parent2)) // then we know they are siblings 217 { 218 if (null != parent1) 219 isNodeAfter = isNodeAfterSibling(parent1, node1, node2); 220 else 221 { 222 // If both parents are null, ordering is not defined. 223 // We're returning a value in lieu of throwing an exception. 224 // Not a case we expect to arise in XPath, but beware if you 225 // try to reuse this method. 226 227 // We can just fall through in this case, which allows us 228 // to hit the debugging code at the end of the function. 229 //return isNodeAfter; 230 } 231 } 232 else 233 { 234 235 // General strategy: Figure out the lengths of the two 236 // ancestor chains, reconcile the lengths, and look for 237 // the lowest common ancestor. If that ancestor is one of 238 // the nodes being compared, it comes before the other. 239 // Otherwise perform a sibling compare. 240 // 241 // NOTE: If no common ancestor is found, ordering is undefined 242 // and we return the default value of isNodeAfter. 243 244 // Count parents in each ancestor chain 245 int nParents1 = 2, nParents2 = 2; // include node & parent obtained above 246 247 while (parent1 != null) 248 { 249 nParents1++; 250 251 parent1 = getParentOfNode(parent1); 252 } 253 254 while (parent2 != null) 255 { 256 nParents2++; 257 258 parent2 = getParentOfNode(parent2); 259 } 260 261 // Initially assume scan for common ancestor starts with 262 // the input nodes. 263 Node startNode1 = node1, startNode2 = node2; 264 265 // If one ancestor chain is longer, adjust its start point 266 // so we're comparing at the same depths 267 if (nParents1 < nParents2) 268 { 269 // Adjust startNode2 to depth of startNode1 270 int adjust = nParents2 - nParents1; 271 272 for (int i = 0; i < adjust; i++) 273 { 274 startNode2 = getParentOfNode(startNode2); 275 } 276 } 277 else if (nParents1 > nParents2) 278 { 279 // adjust startNode1 to depth of startNode2 280 int adjust = nParents1 - nParents2; 281 282 for (int i = 0; i < adjust; i++) 283 { 284 startNode1 = getParentOfNode(startNode1); 285 } 286 } 287 288 Node prevChild1 = null, prevChild2 = null; // so we can "back up" 289 290 // Loop up the ancestor chain looking for common parent 291 while (null != startNode1) 292 { 293 if (startNode1 == startNode2 || isNodeTheSame(startNode1, startNode2)) // common parent? 294 { 295 if (null == prevChild1) // first time in loop? 296 { 297 298 // Edge condition: one is the ancestor of the other. 299 isNodeAfter = (nParents1 < nParents2) ? true : false; 300 301 break; // from while loop 302 } 303 else 304 { 305 // Compare ancestors below lowest-common as siblings 306 isNodeAfter = isNodeAfterSibling(startNode1, prevChild1, 307 prevChild2); 308 309 break; // from while loop 310 } 311 } // end if(startNode1 == startNode2) 312 313 // Move up one level and try again 314 prevChild1 = startNode1; 315 startNode1 = getParentOfNode(startNode1); 316 prevChild2 = startNode2; 317 startNode2 = getParentOfNode(startNode2); 318 } // end while(parents exist to examine) 319 } // end big else (not immediate siblings) 320 321 // WARNING: The following diagnostic won't report the early 322 // "same node" case. Fix if/when needed. 323 324 /* -- please do not remove... very useful for diagnostics -- 325 System.out.println("node1 = "+node1.getNodeName()+"("+node1.getNodeType()+")"+ 326 ", node2 = "+node2.getNodeName() 327 +"("+node2.getNodeType()+")"+ 328 ", isNodeAfter = "+isNodeAfter); */ 329 return isNodeAfter; 330 } // end isNodeAfter(Node node1, Node node2) 331 332 /** 333 * Use DTMNodeProxy to determine whether two nodes are the same. 334 * 335 * @param node1 The first DOM node to compare. 336 * @param node2 The second DOM node to compare. 337 * @return true if the two nodes are the same. 338 */ 339 public static boolean isNodeTheSame(Node node1, Node node2) 340 { 341 if (node1 instanceof DTMNodeProxy && node2 instanceof DTMNodeProxy) 342 return ((DTMNodeProxy)node1).equals((DTMNodeProxy)node2); 343 else 344 return (node1 == node2); 345 } 346 347 /** 348 * Figure out if child2 is after child1 in document order. 349 * <p> 350 * Warning: Some aspects of "document order" are not well defined. 351 * For example, the order of attributes is considered 352 * meaningless in XML, and the order reported by our model will 353 * be consistant for a given invocation but may not 354 * match that of either the source file or the serialized output. 355 * 356 * @param parent Must be the parent of both child1 and child2. 357 * @param child1 Must be the child of parent and not equal to child2. 358 * @param child2 Must be the child of parent and not equal to child1. 359 * @return true if child 2 is after child1 in document order. 360 */ 361 private static boolean isNodeAfterSibling(Node parent, Node child1, 362 Node child2) 363 { 364 365 boolean isNodeAfterSibling = false; 366 short child1type = child1.getNodeType(); 367 short child2type = child2.getNodeType(); 368 369 if ((Node.ATTRIBUTE_NODE != child1type) 370 && (Node.ATTRIBUTE_NODE == child2type)) 371 { 372 373 // always sort attributes before non-attributes. 374 isNodeAfterSibling = false; 375 } 376 else if ((Node.ATTRIBUTE_NODE == child1type) 377 && (Node.ATTRIBUTE_NODE != child2type)) 378 { 379 380 // always sort attributes before non-attributes. 381 isNodeAfterSibling = true; 382 } 383 else if (Node.ATTRIBUTE_NODE == child1type) 384 { 385 NamedNodeMap children = parent.getAttributes(); 386 int nNodes = children.getLength(); 387 boolean found1 = false, found2 = false; 388 389 // Count from the start until we find one or the other. 390 for (int i = 0; i < nNodes; i++) 391 { 392 Node child = children.item(i); 393 394 if (child1 == child || isNodeTheSame(child1, child)) 395 { 396 if (found2) 397 { 398 isNodeAfterSibling = false; 399 400 break; 401 } 402 403 found1 = true; 404 } 405 else if (child2 == child || isNodeTheSame(child2, child)) 406 { 407 if (found1) 408 { 409 isNodeAfterSibling = true; 410 411 break; 412 } 413 414 found2 = true; 415 } 416 } 417 } 418 else 419 { 420 // TODO: Check performance of alternate solution: 421 // There are two choices here: Count from the start of 422 // the document until we find one or the other, or count 423 // from one until we find or fail to find the other. 424 // Either can wind up scanning all the siblings in the worst 425 // case, which on a wide document can be a lot of work but 426 // is more typically is a short list. 427 // Scanning from the start involves two tests per iteration, 428 // but it isn't clear that scanning from the middle doesn't 429 // yield more iterations on average. 430 // We should run some testcases. 431 Node child = parent.getFirstChild(); 432 boolean found1 = false, found2 = false; 433 434 while (null != child) 435 { 436 437 // Node child = children.item(i); 438 if (child1 == child || isNodeTheSame(child1, child)) 439 { 440 if (found2) 441 { 442 isNodeAfterSibling = false; 443 444 break; 445 } 446 447 found1 = true; 448 } 449 else if (child2 == child || isNodeTheSame(child2, child)) 450 { 451 if (found1) 452 { 453 isNodeAfterSibling = true; 454 455 break; 456 } 457 458 found2 = true; 459 } 460 461 child = child.getNextSibling(); 462 } 463 } 464 465 return isNodeAfterSibling; 466 } // end isNodeAfterSibling(Node parent, Node child1, Node child2) 467 468 //========================================================== 469 // SECTION: Namespace resolution 470 //========================================================== 471 472 /** 473 * Get the depth level of this node in the tree (equals 1 for 474 * a parentless node). 475 * 476 * @param n Node to be examined. 477 * @return the number of ancestors, plus one 478 * @xsl.usage internal 479 */ 480 public short getLevel(Node n) 481 { 482 483 short level = 1; 484 485 while (null != (n = getParentOfNode(n))) 486 { 487 level++; 488 } 489 490 return level; 491 } 492 493 /** 494 * Given an XML Namespace prefix and a context in which the prefix 495 * is to be evaluated, return the Namespace Name this prefix was 496 * bound to. Note that DOM Level 3 is expected to provide a version of 497 * this which deals with the DOM's "early binding" behavior. 498 * 499 * Default handling: 500 * 501 * @param prefix String containing namespace prefix to be resolved, 502 * without the ':' which separates it from the localname when used 503 * in a Node Name. The empty sting signifies the default namespace 504 * at this point in the document. 505 * @param namespaceContext Element which provides context for resolution. 506 * (We could extend this to work for other nodes by first seeking their 507 * nearest Element ancestor.) 508 * 509 * @return a String containing the Namespace URI which this prefix 510 * represents in the specified context. 511 */ 512 public String getNamespaceForPrefix(String prefix, Element namespaceContext) 513 { 514 515 int type; 516 Node parent = namespaceContext; 517 String namespace = null; 518 519 if (prefix.equals("xml")) 520 { 521 namespace = QName.S_XMLNAMESPACEURI; // Hardcoded, per Namespace spec 522 } 523 else if(prefix.equals("xmlns")) 524 { 525 // Hardcoded in the DOM spec, expected to be adopted by 526 // Namespace spec. NOTE: Namespace declarations _must_ use 527 // the xmlns: prefix; other prefixes declared as belonging 528 // to this namespace will not be recognized and should 529 // probably be rejected by parsers as erroneous declarations. 530 namespace = "http://www.w3.org/2000/xmlns/"; 531 } 532 else 533 { 534 // Attribute name for this prefix's declaration 535 String declname=(prefix=="") 536 ? "xmlns" 537 : "xmlns:"+prefix; 538 539 // Scan until we run out of Elements or have resolved the namespace 540 while ((null != parent) && (null == namespace) 541 && (((type = parent.getNodeType()) == Node.ELEMENT_NODE) 542 || (type == Node.ENTITY_REFERENCE_NODE))) 543 { 544 if (type == Node.ELEMENT_NODE) 545 { 546 547 // Look for the appropriate Namespace Declaration attribute, 548 // either "xmlns:prefix" or (if prefix is "") "xmlns". 549 // TODO: This does not handle "implicit declarations" 550 // which may be created when the DOM is edited. DOM Level 551 // 3 will define how those should be interpreted. But 552 // this issue won't arise in freshly-parsed DOMs. 553 554 // NOTE: declname is set earlier, outside the loop. 555 Attr attr=((Element)parent).getAttributeNode(declname); 556 if(attr!=null) 557 { 558 namespace = attr.getNodeValue(); 559 break; 560 } 561 } 562 563 parent = getParentOfNode(parent); 564 } 565 } 566 567 return namespace; 568 } 569 570 /** 571 * An experiment for the moment. 572 */ 573 Hashtable m_NSInfos = new Hashtable(); 574 575 /** Object to put into the m_NSInfos table that tells that a node has not been 576 * processed, but has xmlns namespace decls. */ 577 protected static final NSInfo m_NSInfoUnProcWithXMLNS = new NSInfo(false, 578 true); 579 580 /** Object to put into the m_NSInfos table that tells that a node has not been 581 * processed, but has no xmlns namespace decls. */ 582 protected static final NSInfo m_NSInfoUnProcWithoutXMLNS = new NSInfo(false, 583 false); 584 585 /** Object to put into the m_NSInfos table that tells that a node has not been 586 * processed, and has no xmlns namespace decls, and has no ancestor decls. */ 587 protected static final NSInfo m_NSInfoUnProcNoAncestorXMLNS = 588 new NSInfo(false, false, NSInfo.ANCESTORNOXMLNS); 589 590 /** Object to put into the m_NSInfos table that tells that a node has been 591 * processed, and has xmlns namespace decls. */ 592 protected static final NSInfo m_NSInfoNullWithXMLNS = new NSInfo(true, 593 true); 594 595 /** Object to put into the m_NSInfos table that tells that a node has been 596 * processed, and has no xmlns namespace decls. */ 597 protected static final NSInfo m_NSInfoNullWithoutXMLNS = new NSInfo(true, 598 false); 599 600 /** Object to put into the m_NSInfos table that tells that a node has been 601 * processed, and has no xmlns namespace decls. and has no ancestor decls. */ 602 protected static final NSInfo m_NSInfoNullNoAncestorXMLNS = 603 new NSInfo(true, false, NSInfo.ANCESTORNOXMLNS); 604 605 /** Vector of node (odd indexes) and NSInfos (even indexes) that tell if 606 * the given node is a candidate for ancestor namespace processing. */ 607 protected Vector m_candidateNoAncestorXMLNS = new Vector(); 608 609 /** 610 * Returns the namespace of the given node. Differs from simply getting 611 * the node's prefix and using getNamespaceForPrefix in that it attempts 612 * to cache some of the data in NSINFO objects, to avoid repeated lookup. 613 * TODO: Should we consider moving that logic into getNamespaceForPrefix? 614 * 615 * @param n Node to be examined. 616 * 617 * @return String containing the Namespace Name (uri) for this node. 618 * Note that this is undefined for any nodes other than Elements and 619 * Attributes. 620 */ 621 public String getNamespaceOfNode(Node n) 622 { 623 624 String namespaceOfPrefix; 625 boolean hasProcessedNS; 626 NSInfo nsInfo; 627 short ntype = n.getNodeType(); 628 629 if (Node.ATTRIBUTE_NODE != ntype) 630 { 631 Object nsObj = m_NSInfos.get(n); // return value 632 633 nsInfo = (nsObj == null) ? null : (NSInfo) nsObj; 634 hasProcessedNS = (nsInfo == null) ? false : nsInfo.m_hasProcessedNS; 635 } 636 else 637 { 638 hasProcessedNS = false; 639 nsInfo = null; 640 } 641 642 if (hasProcessedNS) 643 { 644 namespaceOfPrefix = nsInfo.m_namespace; 645 } 646 else 647 { 648 namespaceOfPrefix = null; 649 650 String nodeName = n.getNodeName(); 651 int indexOfNSSep = nodeName.indexOf(':'); 652 String prefix; 653 654 if (Node.ATTRIBUTE_NODE == ntype) 655 { 656 if (indexOfNSSep > 0) 657 { 658 prefix = nodeName.substring(0, indexOfNSSep); 659 } 660 else 661 { 662 663 // Attributes don't use the default namespace, so if 664 // there isn't a prefix, we're done. 665 return namespaceOfPrefix; 666 } 667 } 668 else 669 { 670 prefix = (indexOfNSSep >= 0) 671 ? nodeName.substring(0, indexOfNSSep) : ""; 672 } 673 674 boolean ancestorsHaveXMLNS = false; 675 boolean nHasXMLNS = false; 676 677 if (prefix.equals("xml")) 678 { 679 namespaceOfPrefix = QName.S_XMLNAMESPACEURI; 680 } 681 else 682 { 683 int parentType; 684 Node parent = n; 685 686 while ((null != parent) && (null == namespaceOfPrefix)) 687 { 688 if ((null != nsInfo) 689 && (nsInfo.m_ancestorHasXMLNSAttrs 690 == NSInfo.ANCESTORNOXMLNS)) 691 { 692 break; 693 } 694 695 parentType = parent.getNodeType(); 696 697 if ((null == nsInfo) || nsInfo.m_hasXMLNSAttrs) 698 { 699 boolean elementHasXMLNS = false; 700 701 if (parentType == Node.ELEMENT_NODE) 702 { 703 NamedNodeMap nnm = parent.getAttributes(); 704 705 for (int i = 0; i < nnm.getLength(); i++) 706 { 707 Node attr = nnm.item(i); 708 String aname = attr.getNodeName(); 709 710 if (aname.charAt(0) == 'x') 711 { 712 boolean isPrefix = aname.startsWith("xmlns:"); 713 714 if (aname.equals("xmlns") || isPrefix) 715 { 716 if (n == parent) 717 nHasXMLNS = true; 718 719 elementHasXMLNS = true; 720 ancestorsHaveXMLNS = true; 721 722 String p = isPrefix ? aname.substring(6) : ""; 723 724 if (p.equals(prefix)) 725 { 726 namespaceOfPrefix = attr.getNodeValue(); 727 728 break; 729 } 730 } 731 } 732 } 733 } 734 735 if ((Node.ATTRIBUTE_NODE != parentType) && (null == nsInfo) 736 && (n != parent)) 737 { 738 nsInfo = elementHasXMLNS 739 ? m_NSInfoUnProcWithXMLNS : m_NSInfoUnProcWithoutXMLNS; 740 741 m_NSInfos.put(parent, nsInfo); 742 } 743 } 744 745 if (Node.ATTRIBUTE_NODE == parentType) 746 { 747 parent = getParentOfNode(parent); 748 } 749 else 750 { 751 m_candidateNoAncestorXMLNS.addElement(parent); 752 m_candidateNoAncestorXMLNS.addElement(nsInfo); 753 754 parent = parent.getParentNode(); 755 } 756 757 if (null != parent) 758 { 759 Object nsObj = m_NSInfos.get(parent); // return value 760 761 nsInfo = (nsObj == null) ? null : (NSInfo) nsObj; 762 } 763 } 764 765 int nCandidates = m_candidateNoAncestorXMLNS.size(); 766 767 if (nCandidates > 0) 768 { 769 if ((false == ancestorsHaveXMLNS) && (null == parent)) 770 { 771 for (int i = 0; i < nCandidates; i += 2) 772 { 773 Object candidateInfo = m_candidateNoAncestorXMLNS.elementAt(i 774 + 1); 775 776 if (candidateInfo == m_NSInfoUnProcWithoutXMLNS) 777 { 778 m_NSInfos.put(m_candidateNoAncestorXMLNS.elementAt(i), 779 m_NSInfoUnProcNoAncestorXMLNS); 780 } 781 else if (candidateInfo == m_NSInfoNullWithoutXMLNS) 782 { 783 m_NSInfos.put(m_candidateNoAncestorXMLNS.elementAt(i), 784 m_NSInfoNullNoAncestorXMLNS); 785 } 786 } 787 } 788 789 m_candidateNoAncestorXMLNS.removeAllElements(); 790 } 791 } 792 793 if (Node.ATTRIBUTE_NODE != ntype) 794 { 795 if (null == namespaceOfPrefix) 796 { 797 if (ancestorsHaveXMLNS) 798 { 799 if (nHasXMLNS) 800 m_NSInfos.put(n, m_NSInfoNullWithXMLNS); 801 else 802 m_NSInfos.put(n, m_NSInfoNullWithoutXMLNS); 803 } 804 else 805 { 806 m_NSInfos.put(n, m_NSInfoNullNoAncestorXMLNS); 807 } 808 } 809 else 810 { 811 m_NSInfos.put(n, new NSInfo(namespaceOfPrefix, nHasXMLNS)); 812 } 813 } 814 } 815 816 return namespaceOfPrefix; 817 } 818 819 /** 820 * Returns the local name of the given node. If the node's name begins 821 * with a namespace prefix, this is the part after the colon; otherwise 822 * it's the full node name. 823 * 824 * @param n the node to be examined. 825 * 826 * @return String containing the Local Name 827 */ 828 public String getLocalNameOfNode(Node n) 829 { 830 831 String qname = n.getNodeName(); 832 int index = qname.indexOf(':'); 833 834 return (index < 0) ? qname : qname.substring(index + 1); 835 } 836 837 /** 838 * Returns the element name with the namespace prefix (if any) replaced 839 * by the Namespace URI it was bound to. This is not a standard 840 * representation of a node name, but it allows convenient 841 * single-string comparison of the "universal" names of two nodes. 842 * 843 * @param elem Element to be examined. 844 * 845 * @return String in the form "namespaceURI:localname" if the node 846 * belongs to a namespace, or simply "localname" if it doesn't. 847 * @see #getExpandedAttributeName 848 */ 849 public String getExpandedElementName(Element elem) 850 { 851 852 String namespace = getNamespaceOfNode(elem); 853 854 return (null != namespace) 855 ? namespace + ":" + getLocalNameOfNode(elem) 856 : getLocalNameOfNode(elem); 857 } 858 859 /** 860 * Returns the attribute name with the namespace prefix (if any) replaced 861 * by the Namespace URI it was bound to. This is not a standard 862 * representation of a node name, but it allows convenient 863 * single-string comparison of the "universal" names of two nodes. 864 * 865 * @param attr Attr to be examined 866 * 867 * @return String in the form "namespaceURI:localname" if the node 868 * belongs to a namespace, or simply "localname" if it doesn't. 869 * @see #getExpandedElementName 870 */ 871 public String getExpandedAttributeName(Attr attr) 872 { 873 874 String namespace = getNamespaceOfNode(attr); 875 876 return (null != namespace) 877 ? namespace + ":" + getLocalNameOfNode(attr) 878 : getLocalNameOfNode(attr); 879 } 880 881 //========================================================== 882 // SECTION: DOM Helper Functions 883 //========================================================== 884 885 /** 886 * Tell if the node is ignorable whitespace. Note that this can 887 * be determined only in the context of a DTD or other Schema, 888 * and that DOM Level 2 has nostandardized DOM API which can 889 * return that information. 890 * @deprecated 891 * 892 * @param node Node to be examined 893 * 894 * @return CURRENTLY HARDCODED TO FALSE, but should return true if 895 * and only if the node is of type Text, contains only whitespace, 896 * and does not appear as part of the #PCDATA content of an element. 897 * (Note that determining this last may require allowing for 898 * Entity References.) 899 */ 900 public boolean isIgnorableWhitespace(Text node) 901 { 902 903 boolean isIgnorable = false; // return value 904 905 // TODO: I can probably do something to figure out if this 906 // space is ignorable from just the information in 907 // the DOM tree. 908 // -- You need to be able to distinguish whitespace 909 // that is #PCDATA from whitespace that isn't. That requires 910 // DTD support, which won't be standardized until DOM Level 3. 911 return isIgnorable; 912 } 913 914 /** 915 * Get the first unparented node in the ancestor chain. 916 * @deprecated 917 * 918 * @param node Starting node, to specify which chain to chase 919 * 920 * @return the topmost ancestor. 921 */ 922 public Node getRoot(Node node) 923 { 924 925 Node root = null; 926 927 while (node != null) 928 { 929 root = node; 930 node = getParentOfNode(node); 931 } 932 933 return root; 934 } 935 936 /** 937 * Get the root node of the document tree, regardless of 938 * whether or not the node passed in is a document node. 939 * <p> 940 * TODO: This doesn't handle DocumentFragments or "orphaned" subtrees 941 * -- it's currently returning ownerDocument even when the tree is 942 * not actually part of the main Document tree. We should either 943 * rewrite the description to say that it finds the Document node, 944 * or change the code to walk up the ancestor chain. 945 946 * 947 * @param n Node to be examined 948 * 949 * @return the Document node. Note that this is not the correct answer 950 * if n was (or was a child of) a DocumentFragment or an orphaned node, 951 * as can arise if the DOM has been edited rather than being generated 952 * by a parser. 953 */ 954 public Node getRootNode(Node n) 955 { 956 int nt = n.getNodeType(); 957 return ( (Node.DOCUMENT_NODE == nt) || (Node.DOCUMENT_FRAGMENT_NODE == nt) ) 958 ? n : n.getOwnerDocument(); 959 } 960 961 /** 962 * Test whether the given node is a namespace decl node. In DOM Level 2 963 * this can be done in a namespace-aware manner, but in Level 1 DOMs 964 * it has to be done by testing the node name. 965 * 966 * @param n Node to be examined. 967 * 968 * @return boolean -- true iff the node is an Attr whose name is 969 * "xmlns" or has the "xmlns:" prefix. 970 */ 971 public boolean isNamespaceNode(Node n) 972 { 973 974 if (Node.ATTRIBUTE_NODE == n.getNodeType()) 975 { 976 String attrName = n.getNodeName(); 977 978 return (attrName.startsWith("xmlns:") || attrName.equals("xmlns")); 979 } 980 981 return false; 982 } 983 984 /** 985 * Obtain the XPath-model parent of a DOM node -- ownerElement for Attrs, 986 * parent for other nodes. 987 * <p> 988 * Background: The DOM believes that you must be your Parent's 989 * Child, and thus Attrs don't have parents. XPath said that Attrs 990 * do have their owning Element as their parent. This function 991 * bridges the difference, either by using the DOM Level 2 ownerElement 992 * function or by using a "silly and expensive function" in Level 1 993 * DOMs. 994 * <p> 995 * (There's some discussion of future DOMs generalizing ownerElement 996 * into ownerNode and making it work on all types of nodes. This 997 * still wouldn't help the users of Level 1 or Level 2 DOMs) 998 * <p> 999 * 1000 * @param node Node whose XPath parent we want to obtain 1001 * 1002 * @return the parent of the node, or the ownerElement if it's an 1003 * Attr node, or null if the node is an orphan. 1004 * 1005 * @throws RuntimeException if the Document has no root element. 1006 * This can't arise if the Document was created 1007 * via the DOM Level 2 factory methods, but is possible if other 1008 * mechanisms were used to obtain it 1009 */ 1010 public static Node getParentOfNode(Node node) throws RuntimeException 1011 { 1012 Node parent; 1013 short nodeType = node.getNodeType(); 1014 1015 if (Node.ATTRIBUTE_NODE == nodeType) 1016 { 1017 Document doc = node.getOwnerDocument(); 1018 /* 1019 TBD: 1020 if(null == doc) 1021 { 1022 throw new RuntimeException(XSLMessages.createXPATHMessage(XPATHErrorResources.ER_CHILD_HAS_NO_OWNER_DOCUMENT, null));//"Attribute child does not have an owner document!"); 1023 } 1024 */ 1025 1026 // Given how expensive the tree walk may be, we should first ask 1027 // whether this DOM can answer the question for us. The additional 1028 // test does slow down Level 1 DOMs slightly. DOMHelper2, which 1029 // is currently specialized for Xerces, assumes it can use the 1030 // Level 2 solution. We might want to have an intermediate stage, 1031 // which would assume DOM Level 2 but not assume Xerces. 1032 // 1033 // (Shouldn't have to check whether impl is null in a compliant DOM, 1034 // but let's be paranoid for a moment...) 1035 DOMImplementation impl=doc.getImplementation(); 1036 if(impl!=null && impl.hasFeature("Core","2.0")) 1037 { 1038 parent=((Attr)node).getOwnerElement(); 1039 return parent; 1040 } 1041 1042 // DOM Level 1 solution, as fallback. Hugely expensive. 1043 1044 Element rootElem = doc.getDocumentElement(); 1045 1046 if (null == rootElem) 1047 { 1048 throw new RuntimeException( 1049 XMLMessages.createXMLMessage( 1050 XMLErrorResources.ER_CHILD_HAS_NO_OWNER_DOCUMENT_ELEMENT, 1051 null)); //"Attribute child does not have an owner document element!"); 1052 } 1053 1054 parent = locateAttrParent(rootElem, node); 1055 1056 } 1057 else 1058 { 1059 parent = node.getParentNode(); 1060 1061 // if((Node.DOCUMENT_NODE != nodeType) && (null == parent)) 1062 // { 1063 // throw new RuntimeException("Child does not have parent!"); 1064 // } 1065 } 1066 1067 return parent; 1068 } 1069 1070 /** 1071 * Given an ID, return the element. This can work only if the document 1072 * is interpreted in the context of a DTD or Schema, since otherwise 1073 * we don't know which attributes are or aren't IDs. 1074 * <p> 1075 * Note that DOM Level 1 had no ability to retrieve this information. 1076 * DOM Level 2 introduced it but does not promise that it will be 1077 * supported in all DOMs; those which can't support it will always 1078 * return null. 1079 * <p> 1080 * TODO: getElementByID is currently unimplemented. Support DOM Level 2? 1081 * 1082 * @param id The unique identifier to be searched for. 1083 * @param doc The document to search within. 1084 * @return CURRENTLY HARDCODED TO NULL, but it should be: 1085 * The node which has this unique identifier, or null if there 1086 * is no such node or this DOM can't reliably recognize it. 1087 */ 1088 public Element getElementByID(String id, Document doc) 1089 { 1090 return null; 1091 } 1092 1093 /** 1094 * The getUnparsedEntityURI function returns the URI of the unparsed 1095 * entity with the specified name in the same document as the context 1096 * node (see [3.3 Unparsed Entities]). It returns the empty string if 1097 * there is no such entity. 1098 * <p> 1099 * XML processors may choose to use the System Identifier (if one 1100 * is provided) to resolve the entity, rather than the URI in the 1101 * Public Identifier. The details are dependent on the processor, and 1102 * we would have to support some form of plug-in resolver to handle 1103 * this properly. Currently, we simply return the System Identifier if 1104 * present, and hope that it a usable URI or that our caller can 1105 * map it to one. 1106 * TODO: Resolve Public Identifiers... or consider changing function name. 1107 * <p> 1108 * If we find a relative URI 1109 * reference, XML expects it to be resolved in terms of the base URI 1110 * of the document. The DOM doesn't do that for us, and it isn't 1111 * entirely clear whether that should be done here; currently that's 1112 * pushed up to a higher levelof our application. (Note that DOM Level 1113 * 1 didn't store the document's base URI.) 1114 * TODO: Consider resolving Relative URIs. 1115 * <p> 1116 * (The DOM's statement that "An XML processor may choose to 1117 * completely expand entities before the structure model is passed 1118 * to the DOM" refers only to parsed entities, not unparsed, and hence 1119 * doesn't affect this function.) 1120 * 1121 * @param name A string containing the Entity Name of the unparsed 1122 * entity. 1123 * @param doc Document node for the document to be searched. 1124 * 1125 * @return String containing the URI of the Unparsed Entity, or an 1126 * empty string if no such entity exists. 1127 */ 1128 public String getUnparsedEntityURI(String name, Document doc) 1129 { 1130 1131 String url = ""; 1132 DocumentType doctype = doc.getDoctype(); 1133 1134 if (null != doctype) 1135 { 1136 NamedNodeMap entities = doctype.getEntities(); 1137 if(null == entities) 1138 return url; 1139 Entity entity = (Entity) entities.getNamedItem(name); 1140 if(null == entity) 1141 return url; 1142 1143 String notationName = entity.getNotationName(); 1144 1145 if (null != notationName) // then it's unparsed 1146 { 1147 // The draft says: "The XSLT processor may use the public 1148 // identifier to generate a URI for the entity instead of the URI 1149 // specified in the system identifier. If the XSLT processor does 1150 // not use the public identifier to generate the URI, it must use 1151 // the system identifier; if the system identifier is a relative 1152 // URI, it must be resolved into an absolute URI using the URI of 1153 // the resource containing the entity declaration as the base 1154 // URI [RFC2396]." 1155 // So I'm falling a bit short here. 1156 url = entity.getSystemId(); 1157 1158 if (null == url) 1159 { 1160 url = entity.getPublicId(); 1161 } 1162 else 1163 { 1164 // This should be resolved to an absolute URL, but that's hard 1165 // to do from here. 1166 } 1167 } 1168 } 1169 1170 return url; 1171 } 1172 1173 /** 1174 * Support for getParentOfNode; walks a DOM tree until it finds 1175 * the Element which owns the Attr. This is hugely expensive, and 1176 * if at all possible you should use the DOM Level 2 Attr.ownerElement() 1177 * method instead. 1178 * <p> 1179 * The DOM Level 1 developers expected that folks would keep track 1180 * of the last Element they'd seen and could recover the info from 1181 * that source. Obviously that doesn't work very well if the only 1182 * information you've been presented with is the Attr. The DOM Level 2 1183 * getOwnerElement() method fixes that, but only for Level 2 and 1184 * later DOMs. 1185 * 1186 * @param elem Element whose subtree is to be searched for this Attr 1187 * @param attr Attr whose owner is to be located. 1188 * 1189 * @return the first Element whose attribute list includes the provided 1190 * attr. In modern DOMs, this will also be the only such Element. (Early 1191 * DOMs had some hope that Attrs might be sharable, but this idea has 1192 * been abandoned.) 1193 */ 1194 private static Node locateAttrParent(Element elem, Node attr) 1195 { 1196 1197 Node parent = null; 1198 1199 // This should only be called for Level 1 DOMs, so we don't have to 1200 // worry about namespace issues. In later levels, it's possible 1201 // for a DOM to have two Attrs with the same NodeName but 1202 // different namespaces, and we'd need to get getAttributeNodeNS... 1203 // but later levels also have Attr.getOwnerElement. 1204 Attr check=elem.getAttributeNode(attr.getNodeName()); 1205 if(check==attr) 1206 parent = elem; 1207 1208 if (null == parent) 1209 { 1210 for (Node node = elem.getFirstChild(); null != node; 1211 node = node.getNextSibling()) 1212 { 1213 if (Node.ELEMENT_NODE == node.getNodeType()) 1214 { 1215 parent = locateAttrParent((Element) node, attr); 1216 1217 if (null != parent) 1218 break; 1219 } 1220 } 1221 } 1222 1223 return parent; 1224 } 1225 1226 /** 1227 * The factory object used for creating nodes 1228 * in the result tree. 1229 */ 1230 protected Document m_DOMFactory = null; 1231 1232 /** 1233 * Store the factory object required to create DOM nodes 1234 * in the result tree. In fact, that's just the result tree's 1235 * Document node... 1236 * 1237 * @param domFactory The DOM Document Node within whose context 1238 * the result tree will be built. 1239 */ 1240 public void setDOMFactory(Document domFactory) 1241 { 1242 this.m_DOMFactory = domFactory; 1243 } 1244 1245 /** 1246 * Retrieve the factory object required to create DOM nodes 1247 * in the result tree. 1248 * 1249 * @return The result tree's DOM Document Node. 1250 */ 1251 public Document getDOMFactory() 1252 { 1253 1254 if (null == this.m_DOMFactory) 1255 { 1256 this.m_DOMFactory = createDocument(); 1257 } 1258 1259 return this.m_DOMFactory; 1260 } 1261 1262 /** 1263 * Get the textual contents of the node. See 1264 * getNodeData(Node,FastStringBuffer) for discussion of how 1265 * whitespace nodes are handled. 1266 * 1267 * @param node DOM Node to be examined 1268 * @return String containing a concatenation of all the 1269 * textual content within that node. 1270 * @see #getNodeData(Node,FastStringBuffer) 1271 * 1272 */ 1273 public static String getNodeData(Node node) 1274 { 1275 1276 FastStringBuffer buf = StringBufferPool.get(); 1277 String s; 1278 1279 try 1280 { 1281 getNodeData(node, buf); 1282 1283 s = (buf.length() > 0) ? buf.toString() : ""; 1284 } 1285 finally 1286 { 1287 StringBufferPool.free(buf); 1288 } 1289 1290 return s; 1291 } 1292 1293 /** 1294 * Retrieve the text content of a DOM subtree, appending it into a 1295 * user-supplied FastStringBuffer object. Note that attributes are 1296 * not considered part of the content of an element. 1297 * <p> 1298 * There are open questions regarding whitespace stripping. 1299 * Currently we make no special effort in that regard, since the standard 1300 * DOM doesn't yet provide DTD-based information to distinguish 1301 * whitespace-in-element-context from genuine #PCDATA. Note that we 1302 * should probably also consider xml:space if/when we address this. 1303 * DOM Level 3 may solve the problem for us. 1304 * 1305 * @param node Node whose subtree is to be walked, gathering the 1306 * contents of all Text or CDATASection nodes. 1307 * @param buf FastStringBuffer into which the contents of the text 1308 * nodes are to be concatenated. 1309 */ 1310 public static void getNodeData(Node node, FastStringBuffer buf) 1311 { 1312 1313 switch (node.getNodeType()) 1314 { 1315 case Node.DOCUMENT_FRAGMENT_NODE : 1316 case Node.DOCUMENT_NODE : 1317 case Node.ELEMENT_NODE : 1318 { 1319 for (Node child = node.getFirstChild(); null != child; 1320 child = child.getNextSibling()) 1321 { 1322 getNodeData(child, buf); 1323 } 1324 } 1325 break; 1326 case Node.TEXT_NODE : 1327 case Node.CDATA_SECTION_NODE : 1328 buf.append(node.getNodeValue()); 1329 break; 1330 case Node.ATTRIBUTE_NODE : 1331 buf.append(node.getNodeValue()); 1332 break; 1333 case Node.PROCESSING_INSTRUCTION_NODE : 1334 // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING); 1335 break; 1336 default : 1337 // ignore 1338 break; 1339 } 1340 } 1341} 1342