1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the  "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 *     http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18/*
19 * $Id: DOM2DTM.java 478671 2006-11-23 21:00:31Z minchau $
20 */
21package org.apache.xml.dtm.ref.dom2dtm;
22
23import java.util.Vector;
24
25import javax.xml.transform.SourceLocator;
26import javax.xml.transform.dom.DOMSource;
27
28import org.apache.xml.dtm.DTM;
29import org.apache.xml.dtm.DTMManager;
30import org.apache.xml.dtm.DTMWSFilter;
31import org.apache.xml.dtm.ref.DTMDefaultBaseIterators;
32import org.apache.xml.dtm.ref.DTMManagerDefault;
33import org.apache.xml.dtm.ref.ExpandedNameTable;
34import org.apache.xml.dtm.ref.IncrementalSAXSource;
35import org.apache.xml.res.XMLErrorResources;
36import org.apache.xml.res.XMLMessages;
37import org.apache.xml.utils.FastStringBuffer;
38import org.apache.xml.utils.QName;
39import org.apache.xml.utils.StringBufferPool;
40import org.apache.xml.utils.TreeWalker;
41import org.apache.xml.utils.XMLCharacterRecognizer;
42import org.apache.xml.utils.XMLString;
43import org.apache.xml.utils.XMLStringFactory;
44import org.w3c.dom.Attr;
45import org.w3c.dom.Document;
46import org.w3c.dom.DocumentType;
47import org.w3c.dom.Element;
48import org.w3c.dom.Entity;
49import org.w3c.dom.NamedNodeMap;
50import org.w3c.dom.Node;
51import org.xml.sax.ContentHandler;
52
53/** The <code>DOM2DTM</code> class serves up a DOM's contents via the
54 * DTM API.
55 *
56 * Note that it doesn't necessarily represent a full Document
57 * tree. You can wrap a DOM2DTM around a specific node and its subtree
58 * and the right things should happen. (I don't _think_ we currently
59 * support DocumentFrgment nodes as roots, though that might be worth
60 * considering.)
61 *
62 * Note too that we do not currently attempt to track document
63 * mutation. If you alter the DOM after wrapping DOM2DTM around it,
64 * all bets are off.
65 * */
66public class DOM2DTM extends DTMDefaultBaseIterators
67{
68  static final boolean JJK_DEBUG=false;
69  static final boolean JJK_NEWCODE=true;
70
71  /** Manefest constant
72   */
73  static final String NAMESPACE_DECL_NS="http://www.w3.org/XML/1998/namespace";
74
75  /** The current position in the DOM tree. Last node examined for
76   * possible copying to DTM. */
77  transient private Node m_pos;
78  /** The current position in the DTM tree. Who children get appended to. */
79  private int m_last_parent=0;
80  /** The current position in the DTM tree. Who children reference as their
81   * previous sib. */
82  private int m_last_kid=NULL;
83
84  /** The top of the subtree.
85   * %REVIEW%: 'may not be the same as m_context if "//foo" pattern.'
86   * */
87  transient private Node m_root;
88
89  /** True iff the first element has been processed. This is used to control
90      synthesis of the implied xml: namespace declaration node. */
91  boolean m_processedFirstElement=false;
92
93  /** true if ALL the nodes in the m_root subtree have been processed;
94   * false if our incremental build has not yet finished scanning the
95   * DOM tree.  */
96  transient private boolean m_nodesAreProcessed;
97
98  /** The node objects.  The instance part of the handle indexes
99   * directly into this vector.  Each DTM node may actually be
100   * composed of several DOM nodes (for example, if logically-adjacent
101   * Text/CDATASection nodes in the DOM have been coalesced into a
102   * single DTM Text node); this table points only to the first in
103   * that sequence. */
104  protected Vector m_nodes = new Vector();
105
106  /**
107   * Construct a DOM2DTM object from a DOM node.
108   *
109   * @param mgr The DTMManager who owns this DTM.
110   * @param domSource the DOM source that this DTM will wrap.
111   * @param dtmIdentity The DTM identity ID for this DTM.
112   * @param whiteSpaceFilter The white space filter for this DTM, which may
113   *                         be null.
114   * @param xstringfactory XMLString factory for creating character content.
115   * @param doIndexing true if the caller considers it worth it to use
116   *                   indexing schemes.
117   */
118  public DOM2DTM(DTMManager mgr, DOMSource domSource,
119                 int dtmIdentity, DTMWSFilter whiteSpaceFilter,
120                 XMLStringFactory xstringfactory,
121                 boolean doIndexing)
122  {
123    super(mgr, domSource, dtmIdentity, whiteSpaceFilter,
124          xstringfactory, doIndexing);
125
126    // Initialize DOM navigation
127    m_pos=m_root = domSource.getNode();
128    // Initialize DTM navigation
129    m_last_parent=m_last_kid=NULL;
130    m_last_kid=addNode(m_root, m_last_parent,m_last_kid, NULL);
131
132    // Apparently the domSource root may not actually be the
133    // Document node. If it's an Element node, we need to immediately
134    // add its attributes. Adapted from nextNode().
135    // %REVIEW% Move this logic into addNode and recurse? Cleaner!
136    //
137    // (If it's an EntityReference node, we're probably in
138    // seriously bad trouble. For now
139    // I'm just hoping nobody is ever quite that foolish... %REVIEW%)
140		//
141		// %ISSUE% What about inherited namespaces in this case?
142		// Do we need to special-case initialize them into the DTM model?
143    if(ELEMENT_NODE == m_root.getNodeType())
144    {
145      NamedNodeMap attrs=m_root.getAttributes();
146      int attrsize=(attrs==null) ? 0 : attrs.getLength();
147      if(attrsize>0)
148      {
149        int attrIndex=NULL; // start with no previous sib
150        for(int i=0;i<attrsize;++i)
151        {
152          // No need to force nodetype in this case;
153          // addNode() will take care of switching it from
154          // Attr to Namespace if necessary.
155          attrIndex=addNode(attrs.item(i),0,attrIndex,NULL);
156          m_firstch.setElementAt(DTM.NULL,attrIndex);
157        }
158        // Terminate list of attrs, and make sure they aren't
159        // considered children of the element
160        m_nextsib.setElementAt(DTM.NULL,attrIndex);
161
162        // IMPORTANT: This does NOT change m_last_parent or m_last_kid!
163      } // if attrs exist
164    } //if(ELEMENT_NODE)
165
166    // Initialize DTM-completed status
167    m_nodesAreProcessed = false;
168  }
169
170  /**
171   * Construct the node map from the node.
172   *
173   * @param node The node that is to be added to the DTM.
174   * @param parentIndex The current parent index.
175   * @param previousSibling The previous sibling index.
176   * @param forceNodeType If not DTM.NULL, overrides the DOM node type.
177   *	Used to force nodes to Text rather than CDATASection when their
178   *	coalesced value includes ordinary Text nodes (current DTM behavior).
179   *
180   * @return The index identity of the node that was added.
181   */
182  protected int addNode(Node node, int parentIndex,
183                        int previousSibling, int forceNodeType)
184  {
185    int nodeIndex = m_nodes.size();
186
187    // Have we overflowed a DTM Identity's addressing range?
188    if(m_dtmIdent.size() == (nodeIndex>>>DTMManager.IDENT_DTM_NODE_BITS))
189    {
190      try
191      {
192        if(m_mgr==null)
193          throw new ClassCastException();
194
195                                // Handle as Extended Addressing
196        DTMManagerDefault mgrD=(DTMManagerDefault)m_mgr;
197        int id=mgrD.getFirstFreeDTMID();
198        mgrD.addDTM(this,id,nodeIndex);
199        m_dtmIdent.addElement(id<<DTMManager.IDENT_DTM_NODE_BITS);
200      }
201      catch(ClassCastException e)
202      {
203        // %REVIEW% Wrong error message, but I've been told we're trying
204        // not to add messages right not for I18N reasons.
205        // %REVIEW% Should this be a Fatal Error?
206        error(XMLMessages.createXMLMessage(XMLErrorResources.ER_NO_DTMIDS_AVAIL, null));//"No more DTM IDs are available";
207      }
208    }
209
210    m_size++;
211    // ensureSize(nodeIndex);
212
213    int type;
214    if(NULL==forceNodeType)
215        type = node.getNodeType();
216    else
217        type=forceNodeType;
218
219    // %REVIEW% The Namespace Spec currently says that Namespaces are
220    // processed in a non-namespace-aware manner, by matching the
221    // QName, even though there is in fact a namespace assigned to
222    // these nodes in the DOM. If and when that changes, we will have
223    // to consider whether we check the namespace-for-namespaces
224    // rather than the node name.
225    //
226    // %TBD% Note that the DOM does not necessarily explicitly declare
227    // all the namespaces it uses. DOM Level 3 will introduce a
228    // namespace-normalization operation which reconciles that, and we
229    // can request that users invoke it or otherwise ensure that the
230    // tree is namespace-well-formed before passing the DOM to Xalan.
231    // But if they don't, what should we do about it? We probably
232    // don't want to alter the source DOM (and may not be able to do
233    // so if it's read-only). The best available answer might be to
234    // synthesize additional DTM Namespace Nodes that don't correspond
235    // to DOM Attr Nodes.
236    if (Node.ATTRIBUTE_NODE == type)
237    {
238      String name = node.getNodeName();
239
240      if (name.startsWith("xmlns:") || name.equals("xmlns"))
241      {
242        type = DTM.NAMESPACE_NODE;
243      }
244    }
245
246    m_nodes.addElement(node);
247
248    m_firstch.setElementAt(NOTPROCESSED,nodeIndex);
249    m_nextsib.setElementAt(NOTPROCESSED,nodeIndex);
250    m_prevsib.setElementAt(previousSibling,nodeIndex);
251    m_parent.setElementAt(parentIndex,nodeIndex);
252
253    if(DTM.NULL != parentIndex &&
254       type != DTM.ATTRIBUTE_NODE &&
255       type != DTM.NAMESPACE_NODE)
256    {
257      // If the DTM parent had no children, this becomes its first child.
258      if(NOTPROCESSED == m_firstch.elementAt(parentIndex))
259        m_firstch.setElementAt(nodeIndex,parentIndex);
260    }
261
262    String nsURI = node.getNamespaceURI();
263
264    // Deal with the difference between Namespace spec and XSLT
265    // definitions of local name. (The former says PIs don't have
266    // localnames; the latter says they do.)
267    String localName =  (type == Node.PROCESSING_INSTRUCTION_NODE) ?
268                         node.getNodeName() :
269                         node.getLocalName();
270
271    // Hack to make DOM1 sort of work...
272    if(((type == Node.ELEMENT_NODE) || (type == Node.ATTRIBUTE_NODE))
273        && null == localName)
274      localName = node.getNodeName(); // -sb
275
276    ExpandedNameTable exnt = m_expandedNameTable;
277
278    // %TBD% Nodes created with the old non-namespace-aware DOM
279    // calls createElement() and createAttribute() will never have a
280    // localname. That will cause their expandedNameID to be just the
281    // nodeType... which will keep them from being matched
282    // successfully by name. Since the DOM makes no promise that
283    // those will participate in namespace processing, this is
284    // officially accepted as Not Our Fault. But it might be nice to
285    // issue a diagnostic message!
286    if(node.getLocalName()==null &&
287       (type==Node.ELEMENT_NODE || type==Node.ATTRIBUTE_NODE))
288      {
289        // warning("DOM 'level 1' node "+node.getNodeName()+" won't be mapped properly in DOM2DTM.");
290      }
291
292    int expandedNameID = (null != localName)
293       ? exnt.getExpandedTypeID(nsURI, localName, type) :
294         exnt.getExpandedTypeID(type);
295
296    m_exptype.setElementAt(expandedNameID,nodeIndex);
297
298    indexNode(expandedNameID, nodeIndex);
299
300    if (DTM.NULL != previousSibling)
301      m_nextsib.setElementAt(nodeIndex,previousSibling);
302
303    // This should be done after m_exptype has been set, and probably should
304    // always be the last thing we do
305    if (type == DTM.NAMESPACE_NODE)
306        declareNamespaceInContext(parentIndex,nodeIndex);
307
308    return nodeIndex;
309  }
310
311  /**
312   * Get the number of nodes that have been added.
313   */
314  public int getNumberOfNodes()
315  {
316    return m_nodes.size();
317  }
318
319 /**
320   * This method iterates to the next node that will be added to the table.
321   * Each call to this method adds a new node to the table, unless the end
322   * is reached, in which case it returns null.
323   *
324   * @return The true if a next node is found or false if
325   *         there are no more nodes.
326   */
327  protected boolean nextNode()
328  {
329    // Non-recursive one-fetch-at-a-time depth-first traversal with
330    // attribute/namespace nodes and white-space stripping.
331    // Navigating the DOM is simple, navigating the DTM is simple;
332    // keeping track of both at once is a trifle baroque but at least
333    // we've avoided most of the special cases.
334    if (m_nodesAreProcessed)
335      return false;
336
337    // %REVIEW% Is this local copy Really Useful from a performance
338    // point of view?  Or is this a false microoptimization?
339    Node pos=m_pos;
340    Node next=null;
341    int nexttype=NULL;
342
343    // Navigate DOM tree
344    do
345      {
346        // Look down to first child.
347        if (pos.hasChildNodes())
348          {
349            next = pos.getFirstChild();
350
351            // %REVIEW% There's probably a more elegant way to skip
352            // the doctype. (Just let it go and Suppress it?
353            if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType())
354              next=next.getNextSibling();
355
356            // Push DTM context -- except for children of Entity References,
357            // which have no DTM equivalent and cause no DTM navigation.
358            if(ENTITY_REFERENCE_NODE!=pos.getNodeType())
359              {
360                m_last_parent=m_last_kid;
361                m_last_kid=NULL;
362                // Whitespace-handler context stacking
363                if(null != m_wsfilter)
364                {
365                  short wsv =
366                    m_wsfilter.getShouldStripSpace(makeNodeHandle(m_last_parent),this);
367                  boolean shouldStrip = (DTMWSFilter.INHERIT == wsv)
368                    ? getShouldStripWhitespace()
369                    : (DTMWSFilter.STRIP == wsv);
370                  pushShouldStripWhitespace(shouldStrip);
371                } // if(m_wsfilter)
372              }
373          }
374
375        // If that fails, look up and right (but not past root!)
376        else
377          {
378            if(m_last_kid!=NULL)
379              {
380                // Last node posted at this level had no more children
381                // If it has _no_ children, we need to record that.
382                if(m_firstch.elementAt(m_last_kid)==NOTPROCESSED)
383                  m_firstch.setElementAt(NULL,m_last_kid);
384              }
385
386            while(m_last_parent != NULL)
387              {
388                // %REVIEW% There's probably a more elegant way to
389                // skip the doctype. (Just let it go and Suppress it?
390                next = pos.getNextSibling();
391                if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType())
392                  next=next.getNextSibling();
393
394                if(next!=null)
395                  break; // Found it!
396
397                // No next-sibling found. Pop the DOM.
398                pos=pos.getParentNode();
399                if(pos==null)
400                  {
401                    // %TBD% Should never arise, but I want to be sure of that...
402                    if(JJK_DEBUG)
403                      {
404                        System.out.println("***** DOM2DTM Pop Control Flow problem");
405                        for(;;); // Freeze right here!
406                      }
407                  }
408
409                // The only parents in the DTM are Elements.  However,
410                // the DOM could contain EntityReferences.  If we
411                // encounter one, pop it _without_ popping DTM.
412                if(pos!=null && ENTITY_REFERENCE_NODE == pos.getNodeType())
413                  {
414                    // Nothing needs doing
415                    if(JJK_DEBUG)
416                      System.out.println("***** DOM2DTM popping EntRef");
417                  }
418                else
419                  {
420                    popShouldStripWhitespace();
421                    // Fix and pop DTM
422                    if(m_last_kid==NULL)
423                      m_firstch.setElementAt(NULL,m_last_parent); // Popping from an element
424                    else
425                      m_nextsib.setElementAt(NULL,m_last_kid); // Popping from anything else
426                    m_last_parent=m_parent.elementAt(m_last_kid=m_last_parent);
427                  }
428              }
429            if(m_last_parent==NULL)
430              next=null;
431          }
432
433        if(next!=null)
434          nexttype=next.getNodeType();
435
436        // If it's an entity ref, advance past it.
437        //
438        // %REVIEW% Should we let this out the door and just suppress it?
439        // More work, but simpler code, more likely to be correct, and
440        // it doesn't happen very often. We'd get rid of the loop too.
441        if (ENTITY_REFERENCE_NODE == nexttype)
442          pos=next;
443      }
444    while (ENTITY_REFERENCE_NODE == nexttype);
445
446    // Did we run out of the tree?
447    if(next==null)
448      {
449        m_nextsib.setElementAt(NULL,0);
450        m_nodesAreProcessed = true;
451        m_pos=null;
452
453        if(JJK_DEBUG)
454          {
455            System.out.println("***** DOM2DTM Crosscheck:");
456            for(int i=0;i<m_nodes.size();++i)
457              System.out.println(i+":\t"+m_firstch.elementAt(i)+"\t"+m_nextsib.elementAt(i));
458          }
459
460        return false;
461      }
462
463    // Text needs some special handling:
464    //
465    // DTM may skip whitespace. This is handled by the suppressNode flag, which
466    // when true will keep the DTM node from being created.
467    //
468    // DTM only directly records the first DOM node of any logically-contiguous
469    // sequence. The lastTextNode value will be set to the last node in the
470    // contiguous sequence, and -- AFTER the DTM addNode -- can be used to
471    // advance next over this whole block. Should be simpler than special-casing
472    // the above loop for "Was the logically-preceeding sibling a text node".
473    //
474    // Finally, a DTM node should be considered a CDATASection only if all the
475    // contiguous text it covers is CDATASections. The first Text should
476    // force DTM to Text.
477
478    boolean suppressNode=false;
479    Node lastTextNode=null;
480
481    nexttype=next.getNodeType();
482
483    // nexttype=pos.getNodeType();
484    if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype)
485      {
486        // If filtering, initially assume we're going to suppress the node
487        suppressNode=((null != m_wsfilter) && getShouldStripWhitespace());
488
489        // Scan logically contiguous text (siblings, plus "flattening"
490        // of entity reference boundaries).
491        Node n=next;
492        while(n!=null)
493          {
494            lastTextNode=n;
495            // Any Text node means DTM considers it all Text
496            if(TEXT_NODE == n.getNodeType())
497              nexttype=TEXT_NODE;
498            // Any non-whitespace in this sequence blocks whitespace
499            // suppression
500            suppressNode &=
501              XMLCharacterRecognizer.isWhiteSpace(n.getNodeValue());
502
503            n=logicalNextDOMTextNode(n);
504          }
505      }
506
507    // Special handling for PIs: Some DOMs represent the XML
508    // Declaration as a PI. This is officially incorrect, per the DOM
509    // spec, but is considered a "wrong but tolerable" temporary
510    // workaround pending proper handling of these fields in DOM Level
511    // 3. We want to recognize and reject that case.
512    else if(PROCESSING_INSTRUCTION_NODE==nexttype)
513      {
514        suppressNode = (pos.getNodeName().toLowerCase().equals("xml"));
515      }
516
517
518    if(!suppressNode)
519      {
520        // Inserting next. NOTE that we force the node type; for
521        // coalesced Text, this records CDATASections adjacent to
522        // ordinary Text as Text.
523        int nextindex=addNode(next,m_last_parent,m_last_kid,
524			      nexttype);
525
526        m_last_kid=nextindex;
527
528        if(ELEMENT_NODE == nexttype)
529          {
530            int attrIndex=NULL; // start with no previous sib
531            // Process attributes _now_, rather than waiting.
532            // Simpler control flow, makes NS cache available immediately.
533            NamedNodeMap attrs=next.getAttributes();
534            int attrsize=(attrs==null) ? 0 : attrs.getLength();
535            if(attrsize>0)
536              {
537                for(int i=0;i<attrsize;++i)
538                  {
539                    // No need to force nodetype in this case;
540                    // addNode() will take care of switching it from
541                    // Attr to Namespace if necessary.
542                    attrIndex=addNode(attrs.item(i),
543                                      nextindex,attrIndex,NULL);
544                    m_firstch.setElementAt(DTM.NULL,attrIndex);
545
546                    // If the xml: prefix is explicitly declared
547                    // we don't need to synthesize one.
548		    //
549		    // NOTE that XML Namespaces were not originally
550		    // defined as being namespace-aware (grrr), and
551		    // while the W3C is planning to fix this it's
552		    // safer for now to test the QName and trust the
553		    // parsers to prevent anyone from redefining the
554		    // reserved xmlns: prefix
555                    if(!m_processedFirstElement
556                       && "xmlns:xml".equals(attrs.item(i).getNodeName()))
557                      m_processedFirstElement=true;
558                  }
559                // Terminate list of attrs, and make sure they aren't
560                // considered children of the element
561              } // if attrs exist
562            if(!m_processedFirstElement)
563            {
564              // The DOM might not have an explicit declaration for the
565              // implicit "xml:" prefix, but the XPath data model
566              // requires that this appear as a Namespace Node so we
567              // have to synthesize one. You can think of this as
568              // being a default attribute defined by the XML
569              // Namespaces spec rather than by the DTD.
570              attrIndex=addNode(new DOM2DTMdefaultNamespaceDeclarationNode(
571																	(Element)next,"xml",NAMESPACE_DECL_NS,
572																	makeNodeHandle(((attrIndex==NULL)?nextindex:attrIndex)+1)
573																	),
574                                nextindex,attrIndex,NULL);
575              m_firstch.setElementAt(DTM.NULL,attrIndex);
576              m_processedFirstElement=true;
577            }
578            if(attrIndex!=NULL)
579              m_nextsib.setElementAt(DTM.NULL,attrIndex);
580          } //if(ELEMENT_NODE)
581      } // (if !suppressNode)
582
583    // Text postprocessing: Act on values stored above
584    if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype)
585      {
586        // %TBD% If nexttype was forced to TEXT, patch the DTM node
587
588        next=lastTextNode;      // Advance the DOM cursor over contiguous text
589      }
590
591    // Remember where we left off.
592    m_pos=next;
593    return true;
594  }
595
596
597  /**
598   * Return an DOM node for the given node.
599   *
600   * @param nodeHandle The node ID.
601   *
602   * @return A node representation of the DTM node.
603   */
604  public Node getNode(int nodeHandle)
605  {
606
607    int identity = makeNodeIdentity(nodeHandle);
608
609    return (Node) m_nodes.elementAt(identity);
610  }
611
612  /**
613   * Get a Node from an identity index.
614   *
615   * NEEDSDOC @param nodeIdentity
616   *
617   * NEEDSDOC ($objectName$) @return
618   */
619  protected Node lookupNode(int nodeIdentity)
620  {
621    return (Node) m_nodes.elementAt(nodeIdentity);
622  }
623
624  /**
625   * Get the next node identity value in the list, and call the iterator
626   * if it hasn't been added yet.
627   *
628   * @param identity The node identity (index).
629   * @return identity+1, or DTM.NULL.
630   */
631  protected int getNextNodeIdentity(int identity)
632  {
633
634    identity += 1;
635
636    if (identity >= m_nodes.size())
637    {
638      if (!nextNode())
639        identity = DTM.NULL;
640    }
641
642    return identity;
643  }
644
645  /**
646   * Get the handle from a Node.
647   * <p>%OPT% This will be pretty slow.</p>
648   *
649   * <p>%OPT% An XPath-like search (walk up DOM to root, tracking path;
650   * walk down DTM reconstructing path) might be considerably faster
651   * on later nodes in large documents. That might also imply improving
652   * this call to handle nodes which would be in this DTM but
653   * have not yet been built, which might or might not be a Good Thing.</p>
654   *
655   * %REVIEW% This relies on being able to test node-identity via
656   * object-identity. DTM2DOM proxying is a great example of a case where
657   * that doesn't work. DOM Level 3 will provide the isSameNode() method
658   * to fix that, but until then this is going to be flaky.
659   *
660   * @param node A node, which may be null.
661   *
662   * @return The node handle or <code>DTM.NULL</code>.
663   */
664  private int getHandleFromNode(Node node)
665  {
666    if (null != node)
667    {
668      int len = m_nodes.size();
669      boolean isMore;
670      int i = 0;
671      do
672      {
673        for (; i < len; i++)
674        {
675          if (m_nodes.elementAt(i) == node)
676            return makeNodeHandle(i);
677        }
678
679        isMore = nextNode();
680
681        len = m_nodes.size();
682
683      }
684      while(isMore || i < len);
685    }
686
687    return DTM.NULL;
688  }
689
690  /** Get the handle from a Node. This is a more robust version of
691   * getHandleFromNode, intended to be usable by the public.
692   *
693   * <p>%OPT% This will be pretty slow.</p>
694   *
695   * %REVIEW% This relies on being able to test node-identity via
696   * object-identity. DTM2DOM proxying is a great example of a case where
697   * that doesn't work. DOM Level 3 will provide the isSameNode() method
698   * to fix that, but until then this is going to be flaky.
699   *
700   * @param node A node, which may be null.
701   *
702   * @return The node handle or <code>DTM.NULL</code>.  */
703  public int getHandleOfNode(Node node)
704  {
705    if (null != node)
706    {
707      // Is Node actually within the same document? If not, don't search!
708      // This would be easier if m_root was always the Document node, but
709      // we decided to allow wrapping a DTM around a subtree.
710      if((m_root==node) ||
711         (m_root.getNodeType()==DOCUMENT_NODE &&
712          m_root==node.getOwnerDocument()) ||
713         (m_root.getNodeType()!=DOCUMENT_NODE &&
714          m_root.getOwnerDocument()==node.getOwnerDocument())
715         )
716        {
717          // If node _is_ in m_root's tree, find its handle
718          //
719          // %OPT% This check may be improved significantly when DOM
720          // Level 3 nodeKey and relative-order tests become
721          // available!
722          for(Node cursor=node;
723              cursor!=null;
724              cursor=
725                (cursor.getNodeType()!=ATTRIBUTE_NODE)
726                ? cursor.getParentNode()
727                : ((org.w3c.dom.Attr)cursor).getOwnerElement())
728            {
729              if(cursor==m_root)
730                // We know this node; find its handle.
731                return getHandleFromNode(node);
732            } // for ancestors of node
733        } // if node and m_root in same Document
734    } // if node!=null
735
736    return DTM.NULL;
737  }
738
739  /**
740   * Retrieves an attribute node by by qualified name and namespace URI.
741   *
742   * @param nodeHandle int Handle of the node upon which to look up this attribute..
743   * @param namespaceURI The namespace URI of the attribute to
744   *   retrieve, or null.
745   * @param name The local name of the attribute to
746   *   retrieve.
747   * @return The attribute node handle with the specified name (
748   *   <code>nodeName</code>) or <code>DTM.NULL</code> if there is no such
749   *   attribute.
750   */
751  public int getAttributeNode(int nodeHandle, String namespaceURI,
752                              String name)
753  {
754
755    // %OPT% This is probably slower than it needs to be.
756    if (null == namespaceURI)
757      namespaceURI = "";
758
759    int type = getNodeType(nodeHandle);
760
761    if (DTM.ELEMENT_NODE == type)
762    {
763
764      // Assume that attributes immediately follow the element.
765      int identity = makeNodeIdentity(nodeHandle);
766
767      while (DTM.NULL != (identity = getNextNodeIdentity(identity)))
768      {
769        // Assume this can not be null.
770        type = _type(identity);
771
772				// %REVIEW%
773				// Should namespace nodes be retrievable DOM-style as attrs?
774				// If not we need a separate function... which may be desirable
775				// architecturally, but which is ugly from a code point of view.
776				// (If we REALLY insist on it, this code should become a subroutine
777				// of both -- retrieve the node, then test if the type matches
778				// what you're looking for.)
779        if (type == DTM.ATTRIBUTE_NODE || type==DTM.NAMESPACE_NODE)
780        {
781          Node node = lookupNode(identity);
782          String nodeuri = node.getNamespaceURI();
783
784          if (null == nodeuri)
785            nodeuri = "";
786
787          String nodelocalname = node.getLocalName();
788
789          if (nodeuri.equals(namespaceURI) && name.equals(nodelocalname))
790            return makeNodeHandle(identity);
791        }
792
793        else // if (DTM.NAMESPACE_NODE != type)
794        {
795          break;
796        }
797      }
798    }
799
800    return DTM.NULL;
801  }
802
803  /**
804   * Get the string-value of a node as a String object
805   * (see http://www.w3.org/TR/xpath#data-model
806   * for the definition of a node's string-value).
807   *
808   * @param nodeHandle The node ID.
809   *
810   * @return A string object that represents the string-value of the given node.
811   */
812  public XMLString getStringValue(int nodeHandle)
813  {
814
815    int type = getNodeType(nodeHandle);
816    Node node = getNode(nodeHandle);
817    // %TBD% If an element only has one text node, we should just use it
818    // directly.
819    if(DTM.ELEMENT_NODE == type || DTM.DOCUMENT_NODE == type
820    || DTM.DOCUMENT_FRAGMENT_NODE == type)
821    {
822      FastStringBuffer buf = StringBufferPool.get();
823      String s;
824
825      try
826      {
827        getNodeData(node, buf);
828
829        s = (buf.length() > 0) ? buf.toString() : "";
830      }
831      finally
832      {
833        StringBufferPool.free(buf);
834      }
835
836      return m_xstrf.newstr( s );
837    }
838    else if(TEXT_NODE == type || CDATA_SECTION_NODE == type)
839    {
840      // If this is a DTM text node, it may be made of multiple DOM text
841      // nodes -- including navigating into Entity References. DOM2DTM
842      // records the first node in the sequence and requires that we
843      // pick up the others when we retrieve the DTM node's value.
844      //
845      // %REVIEW% DOM Level 3 is expected to add a "whole text"
846      // retrieval method which performs this function for us.
847      FastStringBuffer buf = StringBufferPool.get();
848      while(node!=null)
849      {
850        buf.append(node.getNodeValue());
851        node=logicalNextDOMTextNode(node);
852      }
853      String s=(buf.length() > 0) ? buf.toString() : "";
854      StringBufferPool.free(buf);
855      return m_xstrf.newstr( s );
856    }
857    else
858      return m_xstrf.newstr( node.getNodeValue() );
859  }
860
861  /**
862   * Determine if the string-value of a node is whitespace
863   *
864   * @param nodeHandle The node Handle.
865   *
866   * @return Return true if the given node is whitespace.
867   */
868  public boolean isWhitespace(int nodeHandle)
869  {
870  	int type = getNodeType(nodeHandle);
871    Node node = getNode(nodeHandle);
872  	if(TEXT_NODE == type || CDATA_SECTION_NODE == type)
873    {
874      // If this is a DTM text node, it may be made of multiple DOM text
875      // nodes -- including navigating into Entity References. DOM2DTM
876      // records the first node in the sequence and requires that we
877      // pick up the others when we retrieve the DTM node's value.
878      //
879      // %REVIEW% DOM Level 3 is expected to add a "whole text"
880      // retrieval method which performs this function for us.
881      FastStringBuffer buf = StringBufferPool.get();
882      while(node!=null)
883      {
884        buf.append(node.getNodeValue());
885        node=logicalNextDOMTextNode(node);
886      }
887     boolean b = buf.isWhitespace(0, buf.length());
888      StringBufferPool.free(buf);
889     return b;
890    }
891    return false;
892  }
893
894  /**
895   * Retrieve the text content of a DOM subtree, appending it into a
896   * user-supplied FastStringBuffer object. Note that attributes are
897   * not considered part of the content of an element.
898   * <p>
899   * There are open questions regarding whitespace stripping.
900   * Currently we make no special effort in that regard, since the standard
901   * DOM doesn't yet provide DTD-based information to distinguish
902   * whitespace-in-element-context from genuine #PCDATA. Note that we
903   * should probably also consider xml:space if/when we address this.
904   * DOM Level 3 may solve the problem for us.
905   * <p>
906   * %REVIEW% Actually, since this method operates on the DOM side of the
907   * fence rather than the DTM side, it SHOULDN'T do
908   * any special handling. The DOM does what the DOM does; if you want
909   * DTM-level abstractions, use DTM-level methods.
910   *
911   * @param node Node whose subtree is to be walked, gathering the
912   * contents of all Text or CDATASection nodes.
913   * @param buf FastStringBuffer into which the contents of the text
914   * nodes are to be concatenated.
915   */
916  protected static void getNodeData(Node node, FastStringBuffer buf)
917  {
918
919    switch (node.getNodeType())
920    {
921    case Node.DOCUMENT_FRAGMENT_NODE :
922    case Node.DOCUMENT_NODE :
923    case Node.ELEMENT_NODE :
924    {
925      for (Node child = node.getFirstChild(); null != child;
926              child = child.getNextSibling())
927      {
928        getNodeData(child, buf);
929      }
930    }
931    break;
932    case Node.TEXT_NODE :
933    case Node.CDATA_SECTION_NODE :
934    case Node.ATTRIBUTE_NODE :	// Never a child but might be our starting node
935      buf.append(node.getNodeValue());
936      break;
937    case Node.PROCESSING_INSTRUCTION_NODE :
938      // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING);
939      break;
940    default :
941      // ignore
942      break;
943    }
944  }
945
946  /**
947   * Given a node handle, return its DOM-style node name. This will
948   * include names such as #text or #document.
949   *
950   * @param nodeHandle the id of the node.
951   * @return String Name of this node, which may be an empty string.
952   * %REVIEW% Document when empty string is possible...
953   * %REVIEW-COMMENT% It should never be empty, should it?
954   */
955  public String getNodeName(int nodeHandle)
956  {
957
958    Node node = getNode(nodeHandle);
959
960    // Assume non-null.
961    return node.getNodeName();
962  }
963
964  /**
965   * Given a node handle, return the XPath node name.  This should be
966   * the name as described by the XPath data model, NOT the DOM-style
967   * name.
968   *
969   * @param nodeHandle the id of the node.
970   * @return String Name of this node, which may be an empty string.
971   */
972  public String getNodeNameX(int nodeHandle)
973  {
974
975    String name;
976    short type = getNodeType(nodeHandle);
977
978    switch (type)
979    {
980    case DTM.NAMESPACE_NODE :
981    {
982      Node node = getNode(nodeHandle);
983
984      // assume not null.
985      name = node.getNodeName();
986      if(name.startsWith("xmlns:"))
987      {
988        name = QName.getLocalPart(name);
989      }
990      else if(name.equals("xmlns"))
991      {
992        name = "";
993      }
994    }
995    break;
996    case DTM.ATTRIBUTE_NODE :
997    case DTM.ELEMENT_NODE :
998    case DTM.ENTITY_REFERENCE_NODE :
999    case DTM.PROCESSING_INSTRUCTION_NODE :
1000    {
1001      Node node = getNode(nodeHandle);
1002
1003      // assume not null.
1004      name = node.getNodeName();
1005    }
1006    break;
1007    default :
1008      name = "";
1009    }
1010
1011    return name;
1012  }
1013
1014  /**
1015   * Given a node handle, return its XPath-style localname.
1016   * (As defined in Namespaces, this is the portion of the name after any
1017   * colon character).
1018   *
1019   * @param nodeHandle the id of the node.
1020   * @return String Local name of this node.
1021   */
1022  public String getLocalName(int nodeHandle)
1023  {
1024    if(JJK_NEWCODE)
1025    {
1026      int id=makeNodeIdentity(nodeHandle);
1027      if(NULL==id) return null;
1028      Node newnode=(Node)m_nodes.elementAt(id);
1029      String newname=newnode.getLocalName();
1030      if (null == newname)
1031      {
1032	// XSLT treats PIs, and possibly other things, as having QNames.
1033	String qname = newnode.getNodeName();
1034	if('#'==qname.charAt(0))
1035	{
1036	  //  Match old default for this function
1037	  // This conversion may or may not be necessary
1038	  newname="";
1039	}
1040	else
1041	{
1042	  int index = qname.indexOf(':');
1043	  newname = (index < 0) ? qname : qname.substring(index + 1);
1044	}
1045      }
1046      return newname;
1047    }
1048    else
1049    {
1050      String name;
1051      short type = getNodeType(nodeHandle);
1052      switch (type)
1053      {
1054      case DTM.ATTRIBUTE_NODE :
1055      case DTM.ELEMENT_NODE :
1056      case DTM.ENTITY_REFERENCE_NODE :
1057      case DTM.NAMESPACE_NODE :
1058      case DTM.PROCESSING_INSTRUCTION_NODE :
1059	{
1060	  Node node = getNode(nodeHandle);
1061
1062	  // assume not null.
1063	  name = node.getLocalName();
1064
1065	  if (null == name)
1066	  {
1067	    String qname = node.getNodeName();
1068	    int index = qname.indexOf(':');
1069
1070	    name = (index < 0) ? qname : qname.substring(index + 1);
1071	  }
1072	}
1073	break;
1074      default :
1075	name = "";
1076      }
1077      return name;
1078    }
1079  }
1080
1081  /**
1082   * Given a namespace handle, return the prefix that the namespace decl is
1083   * mapping.
1084   * Given a node handle, return the prefix used to map to the namespace.
1085   *
1086   * <p> %REVIEW% Are you sure you want "" for no prefix?  </p>
1087   * <p> %REVIEW-COMMENT% I think so... not totally sure. -sb  </p>
1088   *
1089   * @param nodeHandle the id of the node.
1090   * @return String prefix of this node's name, or "" if no explicit
1091   * namespace prefix was given.
1092   */
1093  public String getPrefix(int nodeHandle)
1094  {
1095
1096    String prefix;
1097    short type = getNodeType(nodeHandle);
1098
1099    switch (type)
1100    {
1101    case DTM.NAMESPACE_NODE :
1102    {
1103      Node node = getNode(nodeHandle);
1104
1105      // assume not null.
1106      String qname = node.getNodeName();
1107      int index = qname.indexOf(':');
1108
1109      prefix = (index < 0) ? "" : qname.substring(index + 1);
1110    }
1111    break;
1112    case DTM.ATTRIBUTE_NODE :
1113    case DTM.ELEMENT_NODE :
1114    {
1115      Node node = getNode(nodeHandle);
1116
1117      // assume not null.
1118      String qname = node.getNodeName();
1119      int index = qname.indexOf(':');
1120
1121      prefix = (index < 0) ? "" : qname.substring(0, index);
1122    }
1123    break;
1124    default :
1125      prefix = "";
1126    }
1127
1128    return prefix;
1129  }
1130
1131  /**
1132   * Given a node handle, return its DOM-style namespace URI
1133   * (As defined in Namespaces, this is the declared URI which this node's
1134   * prefix -- or default in lieu thereof -- was mapped to.)
1135   *
1136   * <p>%REVIEW% Null or ""? -sb</p>
1137   *
1138   * @param nodeHandle the id of the node.
1139   * @return String URI value of this node's namespace, or null if no
1140   * namespace was resolved.
1141   */
1142  public String getNamespaceURI(int nodeHandle)
1143  {
1144    if(JJK_NEWCODE)
1145    {
1146      int id=makeNodeIdentity(nodeHandle);
1147      if(id==NULL) return null;
1148      Node node=(Node)m_nodes.elementAt(id);
1149      return node.getNamespaceURI();
1150    }
1151    else
1152    {
1153      String nsuri;
1154      short type = getNodeType(nodeHandle);
1155
1156      switch (type)
1157      {
1158      case DTM.ATTRIBUTE_NODE :
1159      case DTM.ELEMENT_NODE :
1160      case DTM.ENTITY_REFERENCE_NODE :
1161      case DTM.NAMESPACE_NODE :
1162      case DTM.PROCESSING_INSTRUCTION_NODE :
1163	{
1164	  Node node = getNode(nodeHandle);
1165
1166	  // assume not null.
1167	  nsuri = node.getNamespaceURI();
1168
1169	  // %TBD% Handle DOM1?
1170	}
1171	break;
1172      default :
1173	nsuri = null;
1174      }
1175
1176      return nsuri;
1177    }
1178
1179  }
1180
1181  /** Utility function: Given a DOM Text node, determine whether it is
1182   * logically followed by another Text or CDATASection node. This may
1183   * involve traversing into Entity References.
1184   *
1185   * %REVIEW% DOM Level 3 is expected to add functionality which may
1186   * allow us to retire this.
1187   */
1188  private Node logicalNextDOMTextNode(Node n)
1189  {
1190        Node p=n.getNextSibling();
1191        if(p==null)
1192        {
1193                // Walk out of any EntityReferenceNodes that ended with text
1194                for(n=n.getParentNode();
1195                        n!=null && ENTITY_REFERENCE_NODE == n.getNodeType();
1196                        n=n.getParentNode())
1197                {
1198                        p=n.getNextSibling();
1199                        if(p!=null)
1200                                break;
1201                }
1202        }
1203        n=p;
1204        while(n!=null && ENTITY_REFERENCE_NODE == n.getNodeType())
1205        {
1206                // Walk into any EntityReferenceNodes that start with text
1207                if(n.hasChildNodes())
1208                        n=n.getFirstChild();
1209                else
1210                        n=n.getNextSibling();
1211        }
1212        if(n!=null)
1213        {
1214                // Found a logical next sibling. Is it text?
1215                int ntype=n.getNodeType();
1216                if(TEXT_NODE != ntype && CDATA_SECTION_NODE != ntype)
1217                        n=null;
1218        }
1219        return n;
1220  }
1221
1222  /**
1223   * Given a node handle, return its node value. This is mostly
1224   * as defined by the DOM, but may ignore some conveniences.
1225   * <p>
1226   *
1227   * @param nodeHandle The node id.
1228   * @return String Value of this node, or null if not
1229   * meaningful for this node type.
1230   */
1231  public String getNodeValue(int nodeHandle)
1232  {
1233    // The _type(nodeHandle) call was taking the lion's share of our
1234    // time, and was wrong anyway since it wasn't coverting handle to
1235    // identity. Inlined it.
1236    int type = _exptype(makeNodeIdentity(nodeHandle));
1237    type=(NULL != type) ? getNodeType(nodeHandle) : NULL;
1238
1239    if(TEXT_NODE!=type && CDATA_SECTION_NODE!=type)
1240      return getNode(nodeHandle).getNodeValue();
1241
1242    // If this is a DTM text node, it may be made of multiple DOM text
1243    // nodes -- including navigating into Entity References. DOM2DTM
1244    // records the first node in the sequence and requires that we
1245    // pick up the others when we retrieve the DTM node's value.
1246    //
1247    // %REVIEW% DOM Level 3 is expected to add a "whole text"
1248    // retrieval method which performs this function for us.
1249    Node node = getNode(nodeHandle);
1250    Node n=logicalNextDOMTextNode(node);
1251    if(n==null)
1252      return node.getNodeValue();
1253
1254    FastStringBuffer buf = StringBufferPool.get();
1255        buf.append(node.getNodeValue());
1256    while(n!=null)
1257    {
1258      buf.append(n.getNodeValue());
1259      n=logicalNextDOMTextNode(n);
1260    }
1261    String s = (buf.length() > 0) ? buf.toString() : "";
1262    StringBufferPool.free(buf);
1263    return s;
1264  }
1265
1266  /**
1267   *   A document type declaration information item has the following properties:
1268   *
1269   *     1. [system identifier] The system identifier of the external subset, if
1270   *        it exists. Otherwise this property has no value.
1271   *
1272   * @return the system identifier String object, or null if there is none.
1273   */
1274  public String getDocumentTypeDeclarationSystemIdentifier()
1275  {
1276
1277    Document doc;
1278
1279    if (m_root.getNodeType() == Node.DOCUMENT_NODE)
1280      doc = (Document) m_root;
1281    else
1282      doc = m_root.getOwnerDocument();
1283
1284    if (null != doc)
1285    {
1286      DocumentType dtd = doc.getDoctype();
1287
1288      if (null != dtd)
1289      {
1290        return dtd.getSystemId();
1291      }
1292    }
1293
1294    return null;
1295  }
1296
1297  /**
1298   * Return the public identifier of the external subset,
1299   * normalized as described in 4.2.2 External Entities [XML]. If there is
1300   * no external subset or if it has no public identifier, this property
1301   * has no value.
1302   *
1303   * @return the public identifier String object, or null if there is none.
1304   */
1305  public String getDocumentTypeDeclarationPublicIdentifier()
1306  {
1307
1308    Document doc;
1309
1310    if (m_root.getNodeType() == Node.DOCUMENT_NODE)
1311      doc = (Document) m_root;
1312    else
1313      doc = m_root.getOwnerDocument();
1314
1315    if (null != doc)
1316    {
1317      DocumentType dtd = doc.getDoctype();
1318
1319      if (null != dtd)
1320      {
1321        return dtd.getPublicId();
1322      }
1323    }
1324
1325    return null;
1326  }
1327
1328  /**
1329   * Returns the <code>Element</code> whose <code>ID</code> is given by
1330   * <code>elementId</code>. If no such element exists, returns
1331   * <code>DTM.NULL</code>. Behavior is not defined if more than one element
1332   * has this <code>ID</code>. Attributes (including those
1333   * with the name "ID") are not of type ID unless so defined by DTD/Schema
1334   * information available to the DTM implementation.
1335   * Implementations that do not know whether attributes are of type ID or
1336   * not are expected to return <code>DTM.NULL</code>.
1337   *
1338   * <p>%REVIEW% Presumably IDs are still scoped to a single document,
1339   * and this operation searches only within a single document, right?
1340   * Wouldn't want collisions between DTMs in the same process.</p>
1341   *
1342   * @param elementId The unique <code>id</code> value for an element.
1343   * @return The handle of the matching element.
1344   */
1345  public int getElementById(String elementId)
1346  {
1347
1348    Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE)
1349        ? (Document) m_root : m_root.getOwnerDocument();
1350
1351    if(null != doc)
1352    {
1353      Node elem = doc.getElementById(elementId);
1354      if(null != elem)
1355      {
1356        int elemHandle = getHandleFromNode(elem);
1357
1358        if(DTM.NULL == elemHandle)
1359        {
1360          int identity = m_nodes.size()-1;
1361          while (DTM.NULL != (identity = getNextNodeIdentity(identity)))
1362          {
1363            Node node = getNode(identity);
1364            if(node == elem)
1365            {
1366              elemHandle = getHandleFromNode(elem);
1367              break;
1368            }
1369           }
1370        }
1371
1372        return elemHandle;
1373      }
1374
1375    }
1376    return DTM.NULL;
1377  }
1378
1379  /**
1380   * The getUnparsedEntityURI function returns the URI of the unparsed
1381   * entity with the specified name in the same document as the context
1382   * node (see [3.3 Unparsed Entities]). It returns the empty string if
1383   * there is no such entity.
1384   * <p>
1385   * XML processors may choose to use the System Identifier (if one
1386   * is provided) to resolve the entity, rather than the URI in the
1387   * Public Identifier. The details are dependent on the processor, and
1388   * we would have to support some form of plug-in resolver to handle
1389   * this properly. Currently, we simply return the System Identifier if
1390   * present, and hope that it a usable URI or that our caller can
1391   * map it to one.
1392   * TODO: Resolve Public Identifiers... or consider changing function name.
1393   * <p>
1394   * If we find a relative URI
1395   * reference, XML expects it to be resolved in terms of the base URI
1396   * of the document. The DOM doesn't do that for us, and it isn't
1397   * entirely clear whether that should be done here; currently that's
1398   * pushed up to a higher level of our application. (Note that DOM Level
1399   * 1 didn't store the document's base URI.)
1400   * TODO: Consider resolving Relative URIs.
1401   * <p>
1402   * (The DOM's statement that "An XML processor may choose to
1403   * completely expand entities before the structure model is passed
1404   * to the DOM" refers only to parsed entities, not unparsed, and hence
1405   * doesn't affect this function.)
1406   *
1407   * @param name A string containing the Entity Name of the unparsed
1408   * entity.
1409   *
1410   * @return String containing the URI of the Unparsed Entity, or an
1411   * empty string if no such entity exists.
1412   */
1413  public String getUnparsedEntityURI(String name)
1414  {
1415
1416    String url = "";
1417    Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE)
1418        ? (Document) m_root : m_root.getOwnerDocument();
1419
1420    if (null != doc)
1421    {
1422      DocumentType doctype = doc.getDoctype();
1423
1424      if (null != doctype)
1425      {
1426        NamedNodeMap entities = doctype.getEntities();
1427        if(null == entities)
1428          return url;
1429        Entity entity = (Entity) entities.getNamedItem(name);
1430        if(null == entity)
1431          return url;
1432
1433        String notationName = entity.getNotationName();
1434
1435        if (null != notationName)  // then it's unparsed
1436        {
1437          // The draft says: "The XSLT processor may use the public
1438          // identifier to generate a URI for the entity instead of the URI
1439          // specified in the system identifier. If the XSLT processor does
1440          // not use the public identifier to generate the URI, it must use
1441          // the system identifier; if the system identifier is a relative
1442          // URI, it must be resolved into an absolute URI using the URI of
1443          // the resource containing the entity declaration as the base
1444          // URI [RFC2396]."
1445          // So I'm falling a bit short here.
1446          url = entity.getSystemId();
1447
1448          if (null == url)
1449          {
1450            url = entity.getPublicId();
1451          }
1452          else
1453          {
1454            // This should be resolved to an absolute URL, but that's hard
1455            // to do from here.
1456          }
1457        }
1458      }
1459    }
1460
1461    return url;
1462  }
1463
1464  /**
1465   *     5. [specified] A flag indicating whether this attribute was actually
1466   *        specified in the start-tag of its element, or was defaulted from the
1467   *        DTD.
1468   *
1469   * @param attributeHandle the attribute handle
1470   * @return <code>true</code> if the attribute was specified;
1471   *         <code>false</code> if it was defaulted.
1472   */
1473  public boolean isAttributeSpecified(int attributeHandle)
1474  {
1475    int type = getNodeType(attributeHandle);
1476
1477    if (DTM.ATTRIBUTE_NODE == type)
1478    {
1479      Attr attr = (Attr)getNode(attributeHandle);
1480      return attr.getSpecified();
1481    }
1482    return false;
1483  }
1484
1485  /** Bind an IncrementalSAXSource to this DTM. NOT RELEVANT for DOM2DTM, since
1486   * we're wrapped around an existing DOM.
1487   *
1488   * @param source The IncrementalSAXSource that we want to recieve events from
1489   * on demand.
1490   */
1491  public void setIncrementalSAXSource(IncrementalSAXSource source)
1492  {
1493  }
1494
1495  /** getContentHandler returns "our SAX builder" -- the thing that
1496   * someone else should send SAX events to in order to extend this
1497   * DTM model.
1498   *
1499   * @return null if this model doesn't respond to SAX events,
1500   * "this" if the DTM object has a built-in SAX ContentHandler,
1501   * the IncrmentalSAXSource if we're bound to one and should receive
1502   * the SAX stream via it for incremental build purposes...
1503   * */
1504  public org.xml.sax.ContentHandler getContentHandler()
1505  {
1506      return null;
1507  }
1508
1509  /**
1510   * Return this DTM's lexical handler.
1511   *
1512   * %REVIEW% Should this return null if constrution already done/begun?
1513   *
1514   * @return null if this model doesn't respond to lexical SAX events,
1515   * "this" if the DTM object has a built-in SAX ContentHandler,
1516   * the IncrementalSAXSource if we're bound to one and should receive
1517   * the SAX stream via it for incremental build purposes...
1518   */
1519  public org.xml.sax.ext.LexicalHandler getLexicalHandler()
1520  {
1521
1522    return null;
1523  }
1524
1525
1526  /**
1527   * Return this DTM's EntityResolver.
1528   *
1529   * @return null if this model doesn't respond to SAX entity ref events.
1530   */
1531  public org.xml.sax.EntityResolver getEntityResolver()
1532  {
1533
1534    return null;
1535  }
1536
1537  /**
1538   * Return this DTM's DTDHandler.
1539   *
1540   * @return null if this model doesn't respond to SAX dtd events.
1541   */
1542  public org.xml.sax.DTDHandler getDTDHandler()
1543  {
1544
1545    return null;
1546  }
1547
1548  /**
1549   * Return this DTM's ErrorHandler.
1550   *
1551   * @return null if this model doesn't respond to SAX error events.
1552   */
1553  public org.xml.sax.ErrorHandler getErrorHandler()
1554  {
1555
1556    return null;
1557  }
1558
1559  /**
1560   * Return this DTM's DeclHandler.
1561   *
1562   * @return null if this model doesn't respond to SAX Decl events.
1563   */
1564  public org.xml.sax.ext.DeclHandler getDeclHandler()
1565  {
1566
1567    return null;
1568  }
1569
1570  /** @return true iff we're building this model incrementally (eg
1571   * we're partnered with a IncrementalSAXSource) and thus require that the
1572   * transformation and the parse run simultaneously. Guidance to the
1573   * DTMManager.
1574   * */
1575  public boolean needsTwoThreads()
1576  {
1577    return false;
1578  }
1579
1580  // ========== Direct SAX Dispatch, for optimization purposes ========
1581
1582  /**
1583   * Returns whether the specified <var>ch</var> conforms to the XML 1.0 definition
1584   * of whitespace.  Refer to <A href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S">
1585   * the definition of <CODE>S</CODE></A> for details.
1586   * @param   ch      Character to check as XML whitespace.
1587   * @return          =true if <var>ch</var> is XML whitespace; otherwise =false.
1588   */
1589  private static boolean isSpace(char ch)
1590  {
1591    return XMLCharacterRecognizer.isWhiteSpace(ch);  // Take the easy way out for now.
1592  }
1593
1594  /**
1595   * Directly call the
1596   * characters method on the passed ContentHandler for the
1597   * string-value of the given node (see http://www.w3.org/TR/xpath#data-model
1598   * for the definition of a node's string-value). Multiple calls to the
1599   * ContentHandler's characters methods may well occur for a single call to
1600   * this method.
1601   *
1602   * @param nodeHandle The node ID.
1603   * @param ch A non-null reference to a ContentHandler.
1604   *
1605   * @throws org.xml.sax.SAXException
1606   */
1607  public void dispatchCharactersEvents(
1608          int nodeHandle, org.xml.sax.ContentHandler ch,
1609          boolean normalize)
1610            throws org.xml.sax.SAXException
1611  {
1612    if(normalize)
1613    {
1614      XMLString str = getStringValue(nodeHandle);
1615      str = str.fixWhiteSpace(true, true, false);
1616      str.dispatchCharactersEvents(ch);
1617    }
1618    else
1619    {
1620      int type = getNodeType(nodeHandle);
1621      Node node = getNode(nodeHandle);
1622      dispatchNodeData(node, ch, 0);
1623          // Text coalition -- a DTM text node may represent multiple
1624          // DOM nodes.
1625          if(TEXT_NODE == type || CDATA_SECTION_NODE == type)
1626          {
1627                  while( null != (node=logicalNextDOMTextNode(node)) )
1628                  {
1629                      dispatchNodeData(node, ch, 0);
1630                  }
1631          }
1632    }
1633  }
1634
1635  /**
1636   * Retrieve the text content of a DOM subtree, appending it into a
1637   * user-supplied FastStringBuffer object. Note that attributes are
1638   * not considered part of the content of an element.
1639   * <p>
1640   * There are open questions regarding whitespace stripping.
1641   * Currently we make no special effort in that regard, since the standard
1642   * DOM doesn't yet provide DTD-based information to distinguish
1643   * whitespace-in-element-context from genuine #PCDATA. Note that we
1644   * should probably also consider xml:space if/when we address this.
1645   * DOM Level 3 may solve the problem for us.
1646   * <p>
1647   * %REVIEW% Note that as a DOM-level operation, it can be argued that this
1648   * routine _shouldn't_ perform any processing beyond what the DOM already
1649   * does, and that whitespace stripping and so on belong at the DTM level.
1650   * If you want a stripped DOM view, wrap DTM2DOM around DOM2DTM.
1651   *
1652   * @param node Node whose subtree is to be walked, gathering the
1653   * contents of all Text or CDATASection nodes.
1654   */
1655  protected static void dispatchNodeData(Node node,
1656                                         org.xml.sax.ContentHandler ch,
1657                                         int depth)
1658            throws org.xml.sax.SAXException
1659  {
1660
1661    switch (node.getNodeType())
1662    {
1663    case Node.DOCUMENT_FRAGMENT_NODE :
1664    case Node.DOCUMENT_NODE :
1665    case Node.ELEMENT_NODE :
1666    {
1667      for (Node child = node.getFirstChild(); null != child;
1668              child = child.getNextSibling())
1669      {
1670        dispatchNodeData(child, ch, depth+1);
1671      }
1672    }
1673    break;
1674    case Node.PROCESSING_INSTRUCTION_NODE : // %REVIEW%
1675    case Node.COMMENT_NODE :
1676      if(0 != depth)
1677        break;
1678        // NOTE: Because this operation works in the DOM space, it does _not_ attempt
1679        // to perform Text Coalition. That should only be done in DTM space.
1680    case Node.TEXT_NODE :
1681    case Node.CDATA_SECTION_NODE :
1682    case Node.ATTRIBUTE_NODE :
1683      String str = node.getNodeValue();
1684      if(ch instanceof CharacterNodeHandler)
1685      {
1686        ((CharacterNodeHandler)ch).characters(node);
1687      }
1688      else
1689      {
1690        ch.characters(str.toCharArray(), 0, str.length());
1691      }
1692      break;
1693//    /* case Node.PROCESSING_INSTRUCTION_NODE :
1694//      // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING);
1695//      break; */
1696    default :
1697      // ignore
1698      break;
1699    }
1700  }
1701
1702  TreeWalker m_walker = new TreeWalker(null);
1703
1704  /**
1705   * Directly create SAX parser events from a subtree.
1706   *
1707   * @param nodeHandle The node ID.
1708   * @param ch A non-null reference to a ContentHandler.
1709   *
1710   * @throws org.xml.sax.SAXException
1711   */
1712  public void dispatchToEvents(int nodeHandle, org.xml.sax.ContentHandler ch)
1713          throws org.xml.sax.SAXException
1714  {
1715    TreeWalker treeWalker = m_walker;
1716    ContentHandler prevCH = treeWalker.getContentHandler();
1717
1718    if(null != prevCH)
1719    {
1720      treeWalker = new TreeWalker(null);
1721    }
1722    treeWalker.setContentHandler(ch);
1723
1724    try
1725    {
1726      Node node = getNode(nodeHandle);
1727      treeWalker.traverseFragment(node);
1728    }
1729    finally
1730    {
1731      treeWalker.setContentHandler(null);
1732    }
1733  }
1734
1735  public interface CharacterNodeHandler
1736  {
1737    public void characters(Node node)
1738            throws org.xml.sax.SAXException;
1739  }
1740
1741  /**
1742   * For the moment all the run time properties are ignored by this
1743   * class.
1744   *
1745   * @param property a <code>String</code> value
1746   * @param value an <code>Object</code> value
1747   */
1748  public void setProperty(String property, Object value)
1749  {
1750  }
1751
1752  /**
1753   * No source information is available for DOM2DTM, so return
1754   * <code>null</code> here.
1755   *
1756   * @param node an <code>int</code> value
1757   * @return null
1758   */
1759  public SourceLocator getSourceLocatorFor(int node)
1760  {
1761    return null;
1762  }
1763
1764}
1765
1766
1767