1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the  "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 *     http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18/*
19 * $Id: DOMHelper.java 468655 2006-10-28 07:12:06Z minchau $
20 */
21package org.apache.xml.utils;
22
23import java.util.Hashtable;
24import java.util.Vector;
25
26import javax.xml.XMLConstants;
27import javax.xml.parsers.DocumentBuilder;
28import javax.xml.parsers.DocumentBuilderFactory;
29import javax.xml.parsers.ParserConfigurationException;
30
31import org.apache.xml.dtm.ref.DTMNodeProxy;
32import org.apache.xml.res.XMLErrorResources;
33import org.apache.xml.res.XMLMessages;
34
35import org.w3c.dom.Attr;
36import org.w3c.dom.DOMImplementation;
37import org.w3c.dom.Document;
38import org.w3c.dom.DocumentType;
39import org.w3c.dom.Element;
40import org.w3c.dom.Entity;
41import org.w3c.dom.NamedNodeMap;
42import org.w3c.dom.Node;
43import org.w3c.dom.Text;
44
45/**
46 * @deprecated Since the introduction of the DTM, this class will be removed.
47 * This class provides a front-end to DOM implementations, providing
48 * a number of utility functions that either aren't yet standardized
49 * by the DOM spec or that are defined in optional DOM modules and
50 * hence may not be present in all DOMs.
51 */
52public class DOMHelper
53{
54
55  /**
56   * DOM Level 1 did not have a standard mechanism for creating a new
57   * Document object. This function provides a DOM-implementation-independent
58   * abstraction for that for that concept. It's typically used when
59   * outputting a new DOM as the result of an operation.
60   * <p>
61   * TODO: This isn't directly compatable with DOM Level 2.
62   * The Level 2 createDocument call also creates the root
63   * element, and thus requires that you know what that element will be
64   * before creating the Document. We should think about whether we want
65   * to change this code, and the callers, so we can use the DOM's own
66   * method. (It's also possible that DOM Level 3 may relax this
67   * sequence, but you may give up some intelligence in the DOM by
68   * doing so; the intent was that knowing the document type and root
69   * element might let the DOM automatically switch to a specialized
70   * subclass for particular kinds of documents.)
71   *
72   * @param isSecureProcessing state of the secure processing feature.
73   * @return The newly created DOM Document object, with no children, or
74   * null if we can't find a DOM implementation that permits creating
75   * new empty Documents.
76   */
77  public static Document createDocument(boolean isSecureProcessing)
78  {
79
80    try
81    {
82
83      // Use an implementation of the JAVA API for XML Parsing 1.0 to
84      // create a DOM Document node to contain the result.
85      DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance();
86
87      dfactory.setNamespaceAware(true);
88      // BEGIN android-removed
89      //     If set, DocumentBuilderFactoryImpl.newDocumentBuilder() fails
90      //     because we haven't implemented validation
91      // dfactory.setValidating(true);
92      // BEGIN android-removed
93
94      // BEGIN android-removed
95      //     We haven't implemented secure processing
96      // if (isSecureProcessing)
97      // {
98      //   try
99      //   {
100      //     dfactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
101      //   }
102      //   catch (ParserConfigurationException pce) {}
103      // }
104      // END android-removed
105
106      DocumentBuilder docBuilder = dfactory.newDocumentBuilder();
107      Document outNode = docBuilder.newDocument();
108
109      return outNode;
110    }
111    catch (ParserConfigurationException pce)
112    {
113      throw new RuntimeException(
114        XMLMessages.createXMLMessage(
115          XMLErrorResources.ER_CREATEDOCUMENT_NOT_SUPPORTED, null));  //"createDocument() not supported in XPathContext!");
116
117      // return null;
118    }
119  }
120
121  /**
122   * DOM Level 1 did not have a standard mechanism for creating a new
123   * Document object. This function provides a DOM-implementation-independent
124   * abstraction for that for that concept. It's typically used when
125   * outputting a new DOM as the result of an operation.
126   *
127   * @return The newly created DOM Document object, with no children, or
128   * null if we can't find a DOM implementation that permits creating
129   * new empty Documents.
130   */
131  public static Document createDocument()
132  {
133    return createDocument(false);
134  }
135
136  /**
137   * Tells, through the combination of the default-space attribute
138   * on xsl:stylesheet, xsl:strip-space, xsl:preserve-space, and the
139   * xml:space attribute, whether or not extra whitespace should be stripped
140   * from the node.  Literal elements from template elements should
141   * <em>not</em> be tested with this function.
142   * @param textNode A text node from the source tree.
143   * @return true if the text node should be stripped of extra whitespace.
144   *
145   * @throws javax.xml.transform.TransformerException
146   * @xsl.usage advanced
147   */
148  public boolean shouldStripSourceNode(Node textNode)
149          throws javax.xml.transform.TransformerException
150  {
151
152    // return (null == m_envSupport) ? false : m_envSupport.shouldStripSourceNode(textNode);
153    return false;
154  }
155
156  /**
157   * Supports the XPath function GenerateID by returning a unique
158   * identifier string for any given DOM Node.
159   * <p>
160   * Warning: The base implementation uses the Node object's hashCode(),
161   * which is NOT guaranteed to be unique. If that method hasn't been
162   * overridden in this DOM ipmlementation, most Java implementions will
163   * derive it from the object's address and should be OK... but if
164   * your DOM uses a different definition of hashCode (eg hashing the
165   * contents of the subtree), or if your DOM may have multiple objects
166   * that represent a single Node in the data structure (eg via proxying),
167   * you may need to find another way to assign a unique identifier.
168   * <p>
169   * Also, be aware that if nodes are destroyed and recreated, there is
170   * an open issue regarding whether an ID may be reused. Currently
171   * we're assuming that the input document is stable for the duration
172   * of the XPath/XSLT operation, so this shouldn't arise in this context.
173   * <p>
174   * (DOM Level 3 is investigating providing a unique node "key", but
175   * that won't help Level 1 and Level 2 implementations.)
176   *
177   * @param node whose identifier you want to obtain
178   *
179   * @return a string which should be different for every Node object.
180   */
181  public String getUniqueID(Node node)
182  {
183    return "N" + Integer.toHexString(node.hashCode()).toUpperCase();
184  }
185
186  /**
187   * Figure out whether node2 should be considered as being later
188   * in the document than node1, in Document Order as defined
189   * by the XPath model. This may not agree with the ordering defined
190   * by other XML applications.
191   * <p>
192   * There are some cases where ordering isn't defined, and neither are
193   * the results of this function -- though we'll generally return true.
194   *
195   * TODO: Make sure this does the right thing with attribute nodes!!!
196   *
197   * @param node1 DOM Node to perform position comparison on.
198   * @param node2 DOM Node to perform position comparison on .
199   *
200   * @return false if node2 comes before node1, otherwise return true.
201   * You can think of this as
202   * <code>(node1.documentOrderPosition &lt;= node2.documentOrderPosition)</code>.
203   */
204  public static boolean isNodeAfter(Node node1, Node node2)
205  {
206    if (node1 == node2 || isNodeTheSame(node1, node2))
207      return true;
208
209        // Default return value, if there is no defined ordering
210    boolean isNodeAfter = true;
211
212    Node parent1 = getParentOfNode(node1);
213    Node parent2 = getParentOfNode(node2);
214
215    // Optimize for most common case
216    if (parent1 == parent2 || isNodeTheSame(parent1, parent2))  // then we know they are siblings
217    {
218      if (null != parent1)
219        isNodeAfter = isNodeAfterSibling(parent1, node1, node2);
220      else
221      {
222                  // If both parents are null, ordering is not defined.
223                  // We're returning a value in lieu of throwing an exception.
224                  // Not a case we expect to arise in XPath, but beware if you
225                  // try to reuse this method.
226
227                  // We can just fall through in this case, which allows us
228                  // to hit the debugging code at the end of the function.
229          //return isNodeAfter;
230      }
231    }
232    else
233    {
234
235      // General strategy: Figure out the lengths of the two
236      // ancestor chains, reconcile the lengths, and look for
237          // the lowest common ancestor. If that ancestor is one of
238          // the nodes being compared, it comes before the other.
239      // Otherwise perform a sibling compare.
240                //
241                // NOTE: If no common ancestor is found, ordering is undefined
242                // and we return the default value of isNodeAfter.
243
244      // Count parents in each ancestor chain
245      int nParents1 = 2, nParents2 = 2;  // include node & parent obtained above
246
247      while (parent1 != null)
248      {
249        nParents1++;
250
251        parent1 = getParentOfNode(parent1);
252      }
253
254      while (parent2 != null)
255      {
256        nParents2++;
257
258        parent2 = getParentOfNode(parent2);
259      }
260
261          // Initially assume scan for common ancestor starts with
262          // the input nodes.
263      Node startNode1 = node1, startNode2 = node2;
264
265      // If one ancestor chain is longer, adjust its start point
266          // so we're comparing at the same depths
267      if (nParents1 < nParents2)
268      {
269        // Adjust startNode2 to depth of startNode1
270        int adjust = nParents2 - nParents1;
271
272        for (int i = 0; i < adjust; i++)
273        {
274          startNode2 = getParentOfNode(startNode2);
275        }
276      }
277      else if (nParents1 > nParents2)
278      {
279        // adjust startNode1 to depth of startNode2
280        int adjust = nParents1 - nParents2;
281
282        for (int i = 0; i < adjust; i++)
283        {
284          startNode1 = getParentOfNode(startNode1);
285        }
286      }
287
288      Node prevChild1 = null, prevChild2 = null;  // so we can "back up"
289
290      // Loop up the ancestor chain looking for common parent
291      while (null != startNode1)
292      {
293        if (startNode1 == startNode2 || isNodeTheSame(startNode1, startNode2))  // common parent?
294        {
295          if (null == prevChild1)  // first time in loop?
296          {
297
298            // Edge condition: one is the ancestor of the other.
299            isNodeAfter = (nParents1 < nParents2) ? true : false;
300
301            break;  // from while loop
302          }
303          else
304          {
305                        // Compare ancestors below lowest-common as siblings
306            isNodeAfter = isNodeAfterSibling(startNode1, prevChild1,
307                                             prevChild2);
308
309            break;  // from while loop
310          }
311        }  // end if(startNode1 == startNode2)
312
313                // Move up one level and try again
314        prevChild1 = startNode1;
315        startNode1 = getParentOfNode(startNode1);
316        prevChild2 = startNode2;
317        startNode2 = getParentOfNode(startNode2);
318      }  // end while(parents exist to examine)
319    }  // end big else (not immediate siblings)
320
321        // WARNING: The following diagnostic won't report the early
322        // "same node" case. Fix if/when needed.
323
324    /* -- please do not remove... very useful for diagnostics --
325    System.out.println("node1 = "+node1.getNodeName()+"("+node1.getNodeType()+")"+
326    ", node2 = "+node2.getNodeName()
327    +"("+node2.getNodeType()+")"+
328    ", isNodeAfter = "+isNodeAfter); */
329    return isNodeAfter;
330  }  // end isNodeAfter(Node node1, Node node2)
331
332  /**
333   * Use DTMNodeProxy to determine whether two nodes are the same.
334   *
335   * @param node1 The first DOM node to compare.
336   * @param node2 The second DOM node to compare.
337   * @return true if the two nodes are the same.
338   */
339  public static boolean isNodeTheSame(Node node1, Node node2)
340  {
341    if (node1 instanceof DTMNodeProxy && node2 instanceof DTMNodeProxy)
342      return ((DTMNodeProxy)node1).equals((DTMNodeProxy)node2);
343    else
344      return (node1 == node2);
345  }
346
347  /**
348   * Figure out if child2 is after child1 in document order.
349   * <p>
350   * Warning: Some aspects of "document order" are not well defined.
351   * For example, the order of attributes is considered
352   * meaningless in XML, and the order reported by our model will
353   * be consistant for a given invocation but may not
354   * match that of either the source file or the serialized output.
355   *
356   * @param parent Must be the parent of both child1 and child2.
357   * @param child1 Must be the child of parent and not equal to child2.
358   * @param child2 Must be the child of parent and not equal to child1.
359   * @return true if child 2 is after child1 in document order.
360   */
361  private static boolean isNodeAfterSibling(Node parent, Node child1,
362                                            Node child2)
363  {
364
365    boolean isNodeAfterSibling = false;
366    short child1type = child1.getNodeType();
367    short child2type = child2.getNodeType();
368
369    if ((Node.ATTRIBUTE_NODE != child1type)
370            && (Node.ATTRIBUTE_NODE == child2type))
371    {
372
373      // always sort attributes before non-attributes.
374      isNodeAfterSibling = false;
375    }
376    else if ((Node.ATTRIBUTE_NODE == child1type)
377             && (Node.ATTRIBUTE_NODE != child2type))
378    {
379
380      // always sort attributes before non-attributes.
381      isNodeAfterSibling = true;
382    }
383    else if (Node.ATTRIBUTE_NODE == child1type)
384    {
385      NamedNodeMap children = parent.getAttributes();
386      int nNodes = children.getLength();
387      boolean found1 = false, found2 = false;
388
389          // Count from the start until we find one or the other.
390      for (int i = 0; i < nNodes; i++)
391      {
392        Node child = children.item(i);
393
394        if (child1 == child || isNodeTheSame(child1, child))
395        {
396          if (found2)
397          {
398            isNodeAfterSibling = false;
399
400            break;
401          }
402
403          found1 = true;
404        }
405        else if (child2 == child || isNodeTheSame(child2, child))
406        {
407          if (found1)
408          {
409            isNodeAfterSibling = true;
410
411            break;
412          }
413
414          found2 = true;
415        }
416      }
417    }
418    else
419    {
420                // TODO: Check performance of alternate solution:
421                // There are two choices here: Count from the start of
422                // the document until we find one or the other, or count
423                // from one until we find or fail to find the other.
424                // Either can wind up scanning all the siblings in the worst
425                // case, which on a wide document can be a lot of work but
426                // is more typically is a short list.
427                // Scanning from the start involves two tests per iteration,
428                // but it isn't clear that scanning from the middle doesn't
429                // yield more iterations on average.
430                // We should run some testcases.
431      Node child = parent.getFirstChild();
432      boolean found1 = false, found2 = false;
433
434      while (null != child)
435      {
436
437        // Node child = children.item(i);
438        if (child1 == child || isNodeTheSame(child1, child))
439        {
440          if (found2)
441          {
442            isNodeAfterSibling = false;
443
444            break;
445          }
446
447          found1 = true;
448        }
449        else if (child2 == child || isNodeTheSame(child2, child))
450        {
451          if (found1)
452          {
453            isNodeAfterSibling = true;
454
455            break;
456          }
457
458          found2 = true;
459        }
460
461        child = child.getNextSibling();
462      }
463    }
464
465    return isNodeAfterSibling;
466  }  // end isNodeAfterSibling(Node parent, Node child1, Node child2)
467
468  //==========================================================
469  // SECTION: Namespace resolution
470  //==========================================================
471
472  /**
473   * Get the depth level of this node in the tree (equals 1 for
474   * a parentless node).
475   *
476   * @param n Node to be examined.
477   * @return the number of ancestors, plus one
478   * @xsl.usage internal
479   */
480  public short getLevel(Node n)
481  {
482
483    short level = 1;
484
485    while (null != (n = getParentOfNode(n)))
486    {
487      level++;
488    }
489
490    return level;
491  }
492
493  /**
494   * Given an XML Namespace prefix and a context in which the prefix
495   * is to be evaluated, return the Namespace Name this prefix was
496   * bound to. Note that DOM Level 3 is expected to provide a version of
497   * this which deals with the DOM's "early binding" behavior.
498   *
499   * Default handling:
500   *
501   * @param prefix String containing namespace prefix to be resolved,
502   * without the ':' which separates it from the localname when used
503   * in a Node Name. The empty sting signifies the default namespace
504   * at this point in the document.
505   * @param namespaceContext Element which provides context for resolution.
506   * (We could extend this to work for other nodes by first seeking their
507   * nearest Element ancestor.)
508   *
509   * @return a String containing the Namespace URI which this prefix
510   * represents in the specified context.
511   */
512  public String getNamespaceForPrefix(String prefix, Element namespaceContext)
513  {
514
515    int type;
516    Node parent = namespaceContext;
517    String namespace = null;
518
519    if (prefix.equals("xml"))
520    {
521      namespace = QName.S_XMLNAMESPACEURI; // Hardcoded, per Namespace spec
522    }
523        else if(prefix.equals("xmlns"))
524    {
525          // Hardcoded in the DOM spec, expected to be adopted by
526          // Namespace spec. NOTE: Namespace declarations _must_ use
527          // the xmlns: prefix; other prefixes declared as belonging
528          // to this namespace will not be recognized and should
529          // probably be rejected by parsers as erroneous declarations.
530      namespace = "http://www.w3.org/2000/xmlns/";
531    }
532    else
533    {
534          // Attribute name for this prefix's declaration
535          String declname=(prefix=="")
536                        ? "xmlns"
537                        : "xmlns:"+prefix;
538
539          // Scan until we run out of Elements or have resolved the namespace
540      while ((null != parent) && (null == namespace)
541             && (((type = parent.getNodeType()) == Node.ELEMENT_NODE)
542                 || (type == Node.ENTITY_REFERENCE_NODE)))
543      {
544        if (type == Node.ELEMENT_NODE)
545        {
546
547                        // Look for the appropriate Namespace Declaration attribute,
548                        // either "xmlns:prefix" or (if prefix is "") "xmlns".
549                        // TODO: This does not handle "implicit declarations"
550                        // which may be created when the DOM is edited. DOM Level
551                        // 3 will define how those should be interpreted. But
552                        // this issue won't arise in freshly-parsed DOMs.
553
554                // NOTE: declname is set earlier, outside the loop.
555                        Attr attr=((Element)parent).getAttributeNode(declname);
556                        if(attr!=null)
557                        {
558                namespace = attr.getNodeValue();
559                break;
560                        }
561                }
562
563        parent = getParentOfNode(parent);
564      }
565    }
566
567    return namespace;
568  }
569
570  /**
571   * An experiment for the moment.
572   */
573  Hashtable m_NSInfos = new Hashtable();
574
575  /** Object to put into the m_NSInfos table that tells that a node has not been
576   *  processed, but has xmlns namespace decls.  */
577  protected static final NSInfo m_NSInfoUnProcWithXMLNS = new NSInfo(false,
578                                                            true);
579
580  /** Object to put into the m_NSInfos table that tells that a node has not been
581   *  processed, but has no xmlns namespace decls.  */
582  protected static final NSInfo m_NSInfoUnProcWithoutXMLNS = new NSInfo(false,
583                                                               false);
584
585  /** Object to put into the m_NSInfos table that tells that a node has not been
586   *  processed, and has no xmlns namespace decls, and has no ancestor decls.  */
587  protected static final NSInfo m_NSInfoUnProcNoAncestorXMLNS =
588    new NSInfo(false, false, NSInfo.ANCESTORNOXMLNS);
589
590  /** Object to put into the m_NSInfos table that tells that a node has been
591   *  processed, and has xmlns namespace decls.  */
592  protected static final NSInfo m_NSInfoNullWithXMLNS = new NSInfo(true,
593                                                          true);
594
595  /** Object to put into the m_NSInfos table that tells that a node has been
596   *  processed, and has no xmlns namespace decls.  */
597  protected static final NSInfo m_NSInfoNullWithoutXMLNS = new NSInfo(true,
598                                                             false);
599
600  /** Object to put into the m_NSInfos table that tells that a node has been
601   *  processed, and has no xmlns namespace decls. and has no ancestor decls.  */
602  protected static final NSInfo m_NSInfoNullNoAncestorXMLNS =
603    new NSInfo(true, false, NSInfo.ANCESTORNOXMLNS);
604
605  /** Vector of node (odd indexes) and NSInfos (even indexes) that tell if
606   *  the given node is a candidate for ancestor namespace processing.  */
607  protected Vector m_candidateNoAncestorXMLNS = new Vector();
608
609  /**
610   * Returns the namespace of the given node. Differs from simply getting
611   * the node's prefix and using getNamespaceForPrefix in that it attempts
612   * to cache some of the data in NSINFO objects, to avoid repeated lookup.
613   * TODO: Should we consider moving that logic into getNamespaceForPrefix?
614   *
615   * @param n Node to be examined.
616   *
617   * @return String containing the Namespace Name (uri) for this node.
618   * Note that this is undefined for any nodes other than Elements and
619   * Attributes.
620   */
621  public String getNamespaceOfNode(Node n)
622  {
623
624    String namespaceOfPrefix;
625    boolean hasProcessedNS;
626    NSInfo nsInfo;
627    short ntype = n.getNodeType();
628
629    if (Node.ATTRIBUTE_NODE != ntype)
630    {
631      Object nsObj = m_NSInfos.get(n);  // return value
632
633      nsInfo = (nsObj == null) ? null : (NSInfo) nsObj;
634      hasProcessedNS = (nsInfo == null) ? false : nsInfo.m_hasProcessedNS;
635    }
636    else
637    {
638      hasProcessedNS = false;
639      nsInfo = null;
640    }
641
642    if (hasProcessedNS)
643    {
644      namespaceOfPrefix = nsInfo.m_namespace;
645    }
646    else
647    {
648      namespaceOfPrefix = null;
649
650      String nodeName = n.getNodeName();
651      int indexOfNSSep = nodeName.indexOf(':');
652      String prefix;
653
654      if (Node.ATTRIBUTE_NODE == ntype)
655      {
656        if (indexOfNSSep > 0)
657        {
658          prefix = nodeName.substring(0, indexOfNSSep);
659        }
660        else
661        {
662
663          // Attributes don't use the default namespace, so if
664          // there isn't a prefix, we're done.
665          return namespaceOfPrefix;
666        }
667      }
668      else
669      {
670        prefix = (indexOfNSSep >= 0)
671                 ? nodeName.substring(0, indexOfNSSep) : "";
672      }
673
674      boolean ancestorsHaveXMLNS = false;
675      boolean nHasXMLNS = false;
676
677      if (prefix.equals("xml"))
678      {
679        namespaceOfPrefix = QName.S_XMLNAMESPACEURI;
680      }
681      else
682      {
683        int parentType;
684        Node parent = n;
685
686        while ((null != parent) && (null == namespaceOfPrefix))
687        {
688          if ((null != nsInfo)
689                  && (nsInfo.m_ancestorHasXMLNSAttrs
690                      == NSInfo.ANCESTORNOXMLNS))
691          {
692            break;
693          }
694
695          parentType = parent.getNodeType();
696
697          if ((null == nsInfo) || nsInfo.m_hasXMLNSAttrs)
698          {
699            boolean elementHasXMLNS = false;
700
701            if (parentType == Node.ELEMENT_NODE)
702            {
703              NamedNodeMap nnm = parent.getAttributes();
704
705              for (int i = 0; i < nnm.getLength(); i++)
706              {
707                Node attr = nnm.item(i);
708                String aname = attr.getNodeName();
709
710                if (aname.charAt(0) == 'x')
711                {
712                  boolean isPrefix = aname.startsWith("xmlns:");
713
714                  if (aname.equals("xmlns") || isPrefix)
715                  {
716                    if (n == parent)
717                      nHasXMLNS = true;
718
719                    elementHasXMLNS = true;
720                    ancestorsHaveXMLNS = true;
721
722                    String p = isPrefix ? aname.substring(6) : "";
723
724                    if (p.equals(prefix))
725                    {
726                      namespaceOfPrefix = attr.getNodeValue();
727
728                      break;
729                    }
730                  }
731                }
732              }
733            }
734
735            if ((Node.ATTRIBUTE_NODE != parentType) && (null == nsInfo)
736                    && (n != parent))
737            {
738              nsInfo = elementHasXMLNS
739                       ? m_NSInfoUnProcWithXMLNS : m_NSInfoUnProcWithoutXMLNS;
740
741              m_NSInfos.put(parent, nsInfo);
742            }
743          }
744
745          if (Node.ATTRIBUTE_NODE == parentType)
746          {
747            parent = getParentOfNode(parent);
748          }
749          else
750          {
751            m_candidateNoAncestorXMLNS.addElement(parent);
752            m_candidateNoAncestorXMLNS.addElement(nsInfo);
753
754            parent = parent.getParentNode();
755          }
756
757          if (null != parent)
758          {
759            Object nsObj = m_NSInfos.get(parent);  // return value
760
761            nsInfo = (nsObj == null) ? null : (NSInfo) nsObj;
762          }
763        }
764
765        int nCandidates = m_candidateNoAncestorXMLNS.size();
766
767        if (nCandidates > 0)
768        {
769          if ((false == ancestorsHaveXMLNS) && (null == parent))
770          {
771            for (int i = 0; i < nCandidates; i += 2)
772            {
773              Object candidateInfo = m_candidateNoAncestorXMLNS.elementAt(i
774                                       + 1);
775
776              if (candidateInfo == m_NSInfoUnProcWithoutXMLNS)
777              {
778                m_NSInfos.put(m_candidateNoAncestorXMLNS.elementAt(i),
779                              m_NSInfoUnProcNoAncestorXMLNS);
780              }
781              else if (candidateInfo == m_NSInfoNullWithoutXMLNS)
782              {
783                m_NSInfos.put(m_candidateNoAncestorXMLNS.elementAt(i),
784                              m_NSInfoNullNoAncestorXMLNS);
785              }
786            }
787          }
788
789          m_candidateNoAncestorXMLNS.removeAllElements();
790        }
791      }
792
793      if (Node.ATTRIBUTE_NODE != ntype)
794      {
795        if (null == namespaceOfPrefix)
796        {
797          if (ancestorsHaveXMLNS)
798          {
799            if (nHasXMLNS)
800              m_NSInfos.put(n, m_NSInfoNullWithXMLNS);
801            else
802              m_NSInfos.put(n, m_NSInfoNullWithoutXMLNS);
803          }
804          else
805          {
806            m_NSInfos.put(n, m_NSInfoNullNoAncestorXMLNS);
807          }
808        }
809        else
810        {
811          m_NSInfos.put(n, new NSInfo(namespaceOfPrefix, nHasXMLNS));
812        }
813      }
814    }
815
816    return namespaceOfPrefix;
817  }
818
819  /**
820   * Returns the local name of the given node. If the node's name begins
821   * with a namespace prefix, this is the part after the colon; otherwise
822   * it's the full node name.
823   *
824   * @param n the node to be examined.
825   *
826   * @return String containing the Local Name
827   */
828  public String getLocalNameOfNode(Node n)
829  {
830
831    String qname = n.getNodeName();
832    int index = qname.indexOf(':');
833
834    return (index < 0) ? qname : qname.substring(index + 1);
835  }
836
837  /**
838   * Returns the element name with the namespace prefix (if any) replaced
839   * by the Namespace URI it was bound to. This is not a standard
840   * representation of a node name, but it allows convenient
841   * single-string comparison of the "universal" names of two nodes.
842   *
843   * @param elem Element to be examined.
844   *
845   * @return String in the form "namespaceURI:localname" if the node
846   * belongs to a namespace, or simply "localname" if it doesn't.
847   * @see #getExpandedAttributeName
848   */
849  public String getExpandedElementName(Element elem)
850  {
851
852    String namespace = getNamespaceOfNode(elem);
853
854    return (null != namespace)
855           ? namespace + ":" + getLocalNameOfNode(elem)
856           : getLocalNameOfNode(elem);
857  }
858
859  /**
860   * Returns the attribute name with the namespace prefix (if any) replaced
861   * by the Namespace URI it was bound to. This is not a standard
862   * representation of a node name, but it allows convenient
863   * single-string comparison of the "universal" names of two nodes.
864   *
865   * @param attr Attr to be examined
866   *
867   * @return String in the form "namespaceURI:localname" if the node
868   * belongs to a namespace, or simply "localname" if it doesn't.
869   * @see #getExpandedElementName
870   */
871  public String getExpandedAttributeName(Attr attr)
872  {
873
874    String namespace = getNamespaceOfNode(attr);
875
876    return (null != namespace)
877           ? namespace + ":" + getLocalNameOfNode(attr)
878           : getLocalNameOfNode(attr);
879  }
880
881  //==========================================================
882  // SECTION: DOM Helper Functions
883  //==========================================================
884
885  /**
886   * Tell if the node is ignorable whitespace. Note that this can
887   * be determined only in the context of a DTD or other Schema,
888   * and that DOM Level 2 has nostandardized DOM API which can
889   * return that information.
890   * @deprecated
891   *
892   * @param node Node to be examined
893   *
894   * @return CURRENTLY HARDCODED TO FALSE, but should return true if
895   * and only if the node is of type Text, contains only whitespace,
896   * and does not appear as part of the #PCDATA content of an element.
897   * (Note that determining this last may require allowing for
898   * Entity References.)
899   */
900  public boolean isIgnorableWhitespace(Text node)
901  {
902
903    boolean isIgnorable = false;  // return value
904
905    // TODO: I can probably do something to figure out if this
906    // space is ignorable from just the information in
907    // the DOM tree.
908        // -- You need to be able to distinguish whitespace
909        // that is #PCDATA from whitespace that isn't.  That requires
910        // DTD support, which won't be standardized until DOM Level 3.
911    return isIgnorable;
912  }
913
914  /**
915   * Get the first unparented node in the ancestor chain.
916   * @deprecated
917   *
918   * @param node Starting node, to specify which chain to chase
919   *
920   * @return the topmost ancestor.
921   */
922  public Node getRoot(Node node)
923  {
924
925    Node root = null;
926
927    while (node != null)
928    {
929      root = node;
930      node = getParentOfNode(node);
931    }
932
933    return root;
934  }
935
936  /**
937   * Get the root node of the document tree, regardless of
938   * whether or not the node passed in is a document node.
939   * <p>
940   * TODO: This doesn't handle DocumentFragments or "orphaned" subtrees
941   * -- it's currently returning ownerDocument even when the tree is
942   * not actually part of the main Document tree. We should either
943   * rewrite the description to say that it finds the Document node,
944   * or change the code to walk up the ancestor chain.
945
946   *
947   * @param n Node to be examined
948   *
949   * @return the Document node. Note that this is not the correct answer
950   * if n was (or was a child of) a DocumentFragment or an orphaned node,
951   * as can arise if the DOM has been edited rather than being generated
952   * by a parser.
953   */
954  public Node getRootNode(Node n)
955  {
956    int nt = n.getNodeType();
957    return ( (Node.DOCUMENT_NODE == nt) || (Node.DOCUMENT_FRAGMENT_NODE == nt) )
958           ? n : n.getOwnerDocument();
959  }
960
961  /**
962   * Test whether the given node is a namespace decl node. In DOM Level 2
963   * this can be done in a namespace-aware manner, but in Level 1 DOMs
964   * it has to be done by testing the node name.
965   *
966   * @param n Node to be examined.
967   *
968   * @return boolean -- true iff the node is an Attr whose name is
969   * "xmlns" or has the "xmlns:" prefix.
970   */
971  public boolean isNamespaceNode(Node n)
972  {
973
974    if (Node.ATTRIBUTE_NODE == n.getNodeType())
975    {
976      String attrName = n.getNodeName();
977
978      return (attrName.startsWith("xmlns:") || attrName.equals("xmlns"));
979    }
980
981    return false;
982  }
983
984  /**
985   * Obtain the XPath-model parent of a DOM node -- ownerElement for Attrs,
986   * parent for other nodes.
987   * <p>
988   * Background: The DOM believes that you must be your Parent's
989   * Child, and thus Attrs don't have parents. XPath said that Attrs
990   * do have their owning Element as their parent. This function
991   * bridges the difference, either by using the DOM Level 2 ownerElement
992   * function or by using a "silly and expensive function" in Level 1
993   * DOMs.
994   * <p>
995   * (There's some discussion of future DOMs generalizing ownerElement
996   * into ownerNode and making it work on all types of nodes. This
997   * still wouldn't help the users of Level 1 or Level 2 DOMs)
998   * <p>
999   *
1000   * @param node Node whose XPath parent we want to obtain
1001   *
1002   * @return the parent of the node, or the ownerElement if it's an
1003   * Attr node, or null if the node is an orphan.
1004   *
1005   * @throws RuntimeException if the Document has no root element.
1006   * This can't arise if the Document was created
1007   * via the DOM Level 2 factory methods, but is possible if other
1008   * mechanisms were used to obtain it
1009   */
1010  public static Node getParentOfNode(Node node) throws RuntimeException
1011  {
1012    Node parent;
1013    short nodeType = node.getNodeType();
1014
1015    if (Node.ATTRIBUTE_NODE == nodeType)
1016    {
1017      Document doc = node.getOwnerDocument();
1018          /*
1019      TBD:
1020      if(null == doc)
1021      {
1022        throw new RuntimeException(XSLMessages.createXPATHMessage(XPATHErrorResources.ER_CHILD_HAS_NO_OWNER_DOCUMENT, null));//"Attribute child does not have an owner document!");
1023      }
1024      */
1025
1026          // Given how expensive the tree walk may be, we should first ask
1027          // whether this DOM can answer the question for us. The additional
1028          // test does slow down Level 1 DOMs slightly. DOMHelper2, which
1029          // is currently specialized for Xerces, assumes it can use the
1030          // Level 2 solution. We might want to have an intermediate stage,
1031          // which would assume DOM Level 2 but not assume Xerces.
1032          //
1033          // (Shouldn't have to check whether impl is null in a compliant DOM,
1034          // but let's be paranoid for a moment...)
1035          DOMImplementation impl=doc.getImplementation();
1036          if(impl!=null && impl.hasFeature("Core","2.0"))
1037          {
1038                  parent=((Attr)node).getOwnerElement();
1039                  return parent;
1040          }
1041
1042          // DOM Level 1 solution, as fallback. Hugely expensive.
1043
1044      Element rootElem = doc.getDocumentElement();
1045
1046      if (null == rootElem)
1047      {
1048        throw new RuntimeException(
1049          XMLMessages.createXMLMessage(
1050            XMLErrorResources.ER_CHILD_HAS_NO_OWNER_DOCUMENT_ELEMENT,
1051            null));  //"Attribute child does not have an owner document element!");
1052      }
1053
1054      parent = locateAttrParent(rootElem, node);
1055
1056        }
1057    else
1058    {
1059      parent = node.getParentNode();
1060
1061      // if((Node.DOCUMENT_NODE != nodeType) && (null == parent))
1062      // {
1063      //   throw new RuntimeException("Child does not have parent!");
1064      // }
1065    }
1066
1067    return parent;
1068  }
1069
1070  /**
1071   * Given an ID, return the element. This can work only if the document
1072   * is interpreted in the context of a DTD or Schema, since otherwise
1073   * we don't know which attributes are or aren't IDs.
1074   * <p>
1075   * Note that DOM Level 1 had no ability to retrieve this information.
1076   * DOM Level 2 introduced it but does not promise that it will be
1077   * supported in all DOMs; those which can't support it will always
1078   * return null.
1079   * <p>
1080   * TODO: getElementByID is currently unimplemented. Support DOM Level 2?
1081   *
1082   * @param id The unique identifier to be searched for.
1083   * @param doc The document to search within.
1084   * @return CURRENTLY HARDCODED TO NULL, but it should be:
1085   * The node which has this unique identifier, or null if there
1086   * is no such node or this DOM can't reliably recognize it.
1087   */
1088  public Element getElementByID(String id, Document doc)
1089  {
1090    return null;
1091  }
1092
1093  /**
1094   * The getUnparsedEntityURI function returns the URI of the unparsed
1095   * entity with the specified name in the same document as the context
1096   * node (see [3.3 Unparsed Entities]). It returns the empty string if
1097   * there is no such entity.
1098   * <p>
1099   * XML processors may choose to use the System Identifier (if one
1100   * is provided) to resolve the entity, rather than the URI in the
1101   * Public Identifier. The details are dependent on the processor, and
1102   * we would have to support some form of plug-in resolver to handle
1103   * this properly. Currently, we simply return the System Identifier if
1104   * present, and hope that it a usable URI or that our caller can
1105   * map it to one.
1106   * TODO: Resolve Public Identifiers... or consider changing function name.
1107   * <p>
1108   * If we find a relative URI
1109   * reference, XML expects it to be resolved in terms of the base URI
1110   * of the document. The DOM doesn't do that for us, and it isn't
1111   * entirely clear whether that should be done here; currently that's
1112   * pushed up to a higher levelof our application. (Note that DOM Level
1113   * 1 didn't store the document's base URI.)
1114   * TODO: Consider resolving Relative URIs.
1115   * <p>
1116   * (The DOM's statement that "An XML processor may choose to
1117   * completely expand entities before the structure model is passed
1118   * to the DOM" refers only to parsed entities, not unparsed, and hence
1119   * doesn't affect this function.)
1120   *
1121   * @param name A string containing the Entity Name of the unparsed
1122   * entity.
1123   * @param doc Document node for the document to be searched.
1124   *
1125   * @return String containing the URI of the Unparsed Entity, or an
1126   * empty string if no such entity exists.
1127   */
1128  public String getUnparsedEntityURI(String name, Document doc)
1129  {
1130
1131    String url = "";
1132    DocumentType doctype = doc.getDoctype();
1133
1134    if (null != doctype)
1135    {
1136      NamedNodeMap entities = doctype.getEntities();
1137      if(null == entities)
1138        return url;
1139      Entity entity = (Entity) entities.getNamedItem(name);
1140      if(null == entity)
1141        return url;
1142
1143      String notationName = entity.getNotationName();
1144
1145      if (null != notationName)  // then it's unparsed
1146      {
1147        // The draft says: "The XSLT processor may use the public
1148        // identifier to generate a URI for the entity instead of the URI
1149        // specified in the system identifier. If the XSLT processor does
1150        // not use the public identifier to generate the URI, it must use
1151        // the system identifier; if the system identifier is a relative
1152        // URI, it must be resolved into an absolute URI using the URI of
1153        // the resource containing the entity declaration as the base
1154        // URI [RFC2396]."
1155        // So I'm falling a bit short here.
1156        url = entity.getSystemId();
1157
1158        if (null == url)
1159        {
1160          url = entity.getPublicId();
1161        }
1162        else
1163        {
1164          // This should be resolved to an absolute URL, but that's hard
1165          // to do from here.
1166        }
1167      }
1168    }
1169
1170    return url;
1171  }
1172
1173  /**
1174   * Support for getParentOfNode; walks a DOM tree until it finds
1175   * the Element which owns the Attr. This is hugely expensive, and
1176   * if at all possible you should use the DOM Level 2 Attr.ownerElement()
1177   * method instead.
1178   *  <p>
1179   * The DOM Level 1 developers expected that folks would keep track
1180   * of the last Element they'd seen and could recover the info from
1181   * that source. Obviously that doesn't work very well if the only
1182   * information you've been presented with is the Attr. The DOM Level 2
1183   * getOwnerElement() method fixes that, but only for Level 2 and
1184   * later DOMs.
1185   *
1186   * @param elem Element whose subtree is to be searched for this Attr
1187   * @param attr Attr whose owner is to be located.
1188   *
1189   * @return the first Element whose attribute list includes the provided
1190   * attr. In modern DOMs, this will also be the only such Element. (Early
1191   * DOMs had some hope that Attrs might be sharable, but this idea has
1192   * been abandoned.)
1193   */
1194  private static Node locateAttrParent(Element elem, Node attr)
1195  {
1196
1197    Node parent = null;
1198
1199        // This should only be called for Level 1 DOMs, so we don't have to
1200        // worry about namespace issues. In later levels, it's possible
1201        // for a DOM to have two Attrs with the same NodeName but
1202        // different namespaces, and we'd need to get getAttributeNodeNS...
1203        // but later levels also have Attr.getOwnerElement.
1204        Attr check=elem.getAttributeNode(attr.getNodeName());
1205        if(check==attr)
1206                parent = elem;
1207
1208    if (null == parent)
1209    {
1210      for (Node node = elem.getFirstChild(); null != node;
1211              node = node.getNextSibling())
1212      {
1213        if (Node.ELEMENT_NODE == node.getNodeType())
1214        {
1215          parent = locateAttrParent((Element) node, attr);
1216
1217          if (null != parent)
1218            break;
1219        }
1220      }
1221    }
1222
1223    return parent;
1224  }
1225
1226  /**
1227   * The factory object used for creating nodes
1228   * in the result tree.
1229   */
1230  protected Document m_DOMFactory = null;
1231
1232  /**
1233   * Store the factory object required to create DOM nodes
1234   * in the result tree. In fact, that's just the result tree's
1235   * Document node...
1236   *
1237   * @param domFactory The DOM Document Node within whose context
1238   * the result tree will be built.
1239   */
1240  public void setDOMFactory(Document domFactory)
1241  {
1242    this.m_DOMFactory = domFactory;
1243  }
1244
1245  /**
1246   * Retrieve the factory object required to create DOM nodes
1247   * in the result tree.
1248   *
1249   * @return The result tree's DOM Document Node.
1250   */
1251  public Document getDOMFactory()
1252  {
1253
1254    if (null == this.m_DOMFactory)
1255    {
1256      this.m_DOMFactory = createDocument();
1257    }
1258
1259    return this.m_DOMFactory;
1260  }
1261
1262  /**
1263   * Get the textual contents of the node. See
1264   * getNodeData(Node,FastStringBuffer) for discussion of how
1265   * whitespace nodes are handled.
1266   *
1267   * @param node DOM Node to be examined
1268   * @return String containing a concatenation of all the
1269   * textual content within that node.
1270   * @see #getNodeData(Node,FastStringBuffer)
1271   *
1272   */
1273  public static String getNodeData(Node node)
1274  {
1275
1276    FastStringBuffer buf = StringBufferPool.get();
1277    String s;
1278
1279    try
1280    {
1281      getNodeData(node, buf);
1282
1283      s = (buf.length() > 0) ? buf.toString() : "";
1284    }
1285    finally
1286    {
1287      StringBufferPool.free(buf);
1288    }
1289
1290    return s;
1291  }
1292
1293  /**
1294   * Retrieve the text content of a DOM subtree, appending it into a
1295   * user-supplied FastStringBuffer object. Note that attributes are
1296   * not considered part of the content of an element.
1297   * <p>
1298   * There are open questions regarding whitespace stripping.
1299   * Currently we make no special effort in that regard, since the standard
1300   * DOM doesn't yet provide DTD-based information to distinguish
1301   * whitespace-in-element-context from genuine #PCDATA. Note that we
1302   * should probably also consider xml:space if/when we address this.
1303   * DOM Level 3 may solve the problem for us.
1304   *
1305   * @param node Node whose subtree is to be walked, gathering the
1306   * contents of all Text or CDATASection nodes.
1307   * @param buf FastStringBuffer into which the contents of the text
1308   * nodes are to be concatenated.
1309   */
1310  public static void getNodeData(Node node, FastStringBuffer buf)
1311  {
1312
1313    switch (node.getNodeType())
1314    {
1315    case Node.DOCUMENT_FRAGMENT_NODE :
1316    case Node.DOCUMENT_NODE :
1317    case Node.ELEMENT_NODE :
1318    {
1319      for (Node child = node.getFirstChild(); null != child;
1320              child = child.getNextSibling())
1321      {
1322        getNodeData(child, buf);
1323      }
1324    }
1325    break;
1326    case Node.TEXT_NODE :
1327    case Node.CDATA_SECTION_NODE :
1328      buf.append(node.getNodeValue());
1329      break;
1330    case Node.ATTRIBUTE_NODE :
1331      buf.append(node.getNodeValue());
1332      break;
1333    case Node.PROCESSING_INSTRUCTION_NODE :
1334      // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING);
1335      break;
1336    default :
1337      // ignore
1338      break;
1339    }
1340  }
1341}
1342