1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the  "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 *     http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18/*
19 * $Id: SAX2RTFDTM.java 468653 2006-10-28 07:07:05Z minchau $
20 */
21package org.apache.xml.dtm.ref.sax2dtm;
22
23import javax.xml.transform.Source;
24
25import org.apache.xml.dtm.DTM;
26import org.apache.xml.dtm.DTMManager;
27import org.apache.xml.dtm.DTMWSFilter;
28import org.apache.xml.utils.IntStack;
29import org.apache.xml.utils.IntVector;
30import org.apache.xml.utils.StringVector;
31import org.apache.xml.utils.XMLStringFactory;
32
33import org.xml.sax.SAXException;
34
35/**
36 * This is a subclass of SAX2DTM which has been modified to meet the needs of
37 * Result Tree Frameworks (RTFs). The differences are:
38 *
39 * 1) Multiple XML trees may be appended to the single DTM. This means
40 * that the root node of each document is _not_ node 0. Some code has
41 * had to be deoptimized to support this mode of operation, and an
42 * explicit mechanism for obtaining the Node Handle of the root node
43 * has been provided.
44 *
45 * 2) A stack of these documents is maintained, allowing us to "tail-prune" the
46 * most recently added trees off the end of the DTM as stylesheet elements
47 * (and thus variable contexts) are exited.
48 *
49 * PLEASE NOTE that this class may be _heavily_ dependent upon the
50 * internals of the SAX2DTM superclass, and must be maintained in
51 * parallel with that code.  Arguably, they should be conditionals
52 * within a single class... but they have deen separated for
53 * performance reasons. (In fact, one could even argue about which is
54 * the superclass and which is the subclass; the current arrangement
55 * is as much about preserving stability of existing code during
56 * development as anything else.)
57 *
58 * %REVIEW% In fact, since the differences are so minor, I think it
59 * may be possible/practical to fold them back into the base
60 * SAX2DTM. Consider that as a future code-size optimization.
61 * */
62public class SAX2RTFDTM extends SAX2DTM
63{
64  /** Set true to monitor SAX events and similar diagnostic info. */
65  private static final boolean DEBUG = false;
66
67  /** Most recently started Document, or null if the DTM is empty.  */
68  private int m_currentDocumentNode=NULL;
69
70  /** Tail-pruning mark: Number of nodes in use */
71  IntStack mark_size=new IntStack();
72  /** Tail-pruning mark: Number of data items in use */
73  IntStack mark_data_size=new IntStack();
74  /** Tail-pruning mark: Number of size-of-data fields in use */
75  IntStack mark_char_size=new IntStack();
76  /** Tail-pruning mark: Number of dataOrQName slots in use */
77  IntStack mark_doq_size=new IntStack();
78  /** Tail-pruning mark: Number of namespace declaration sets in use
79   * %REVIEW% I don't think number of NS sets is ever different from number
80   * of NS elements. We can probabably reduce these to a single stack and save
81   * some storage.
82   * */
83  IntStack mark_nsdeclset_size=new IntStack();
84  /** Tail-pruning mark: Number of naespace declaration elements in use
85   * %REVIEW% I don't think number of NS sets is ever different from number
86   * of NS elements. We can probabably reduce these to a single stack and save
87   * some storage.
88   */
89  IntStack mark_nsdeclelem_size=new IntStack();
90
91  /**
92   * Tail-pruning mark:  initial number of nodes in use
93   */
94  int m_emptyNodeCount;
95
96  /**
97   * Tail-pruning mark:  initial number of namespace declaration sets
98   */
99  int m_emptyNSDeclSetCount;
100
101  /**
102   * Tail-pruning mark:  initial number of namespace declaration elements
103   */
104  int m_emptyNSDeclSetElemsCount;
105
106  /**
107   * Tail-pruning mark:  initial number of data items in use
108   */
109  int m_emptyDataCount;
110
111  /**
112   * Tail-pruning mark:  initial number of characters in use
113   */
114  int m_emptyCharsCount;
115
116  /**
117   * Tail-pruning mark:  default initial number of dataOrQName slots in use
118   */
119  int m_emptyDataQNCount;
120
121  public SAX2RTFDTM(DTMManager mgr, Source source, int dtmIdentity,
122                 DTMWSFilter whiteSpaceFilter,
123                 XMLStringFactory xstringfactory,
124                 boolean doIndexing)
125  {
126    super(mgr, source, dtmIdentity, whiteSpaceFilter,
127          xstringfactory, doIndexing);
128
129    // NEVER track source locators for RTFs; they aren't meaningful. I think.
130    // (If we did track them, we'd need to tail-prune these too.)
131    //org.apache.xalan.processor.TransformerFactoryImpl.m_source_location;
132    m_useSourceLocationProperty=false;
133    m_sourceSystemId = (m_useSourceLocationProperty) ? new StringVector()
134                                                     : null;
135    m_sourceLine = (m_useSourceLocationProperty) ? new IntVector() : null;
136    m_sourceColumn = (m_useSourceLocationProperty) ? new IntVector() : null;
137
138    // Record initial sizes of fields that are pushed and restored
139    // for RTF tail-pruning.  More entries can be popped than pushed, so
140    // we need this to mark the primordial state of the DTM.
141    m_emptyNodeCount = m_size;
142    m_emptyNSDeclSetCount = (m_namespaceDeclSets == null)
143                                 ? 0 : m_namespaceDeclSets.size();
144    m_emptyNSDeclSetElemsCount = (m_namespaceDeclSetElements == null)
145                                      ? 0 : m_namespaceDeclSetElements.size();
146    m_emptyDataCount = m_data.size();
147    m_emptyCharsCount = m_chars.size();
148    m_emptyDataQNCount = m_dataOrQName.size();
149  }
150
151  /**
152   * Given a DTM, find the owning document node. In the case of
153   * SAX2RTFDTM, which may contain multiple documents, this returns
154   * the <b>most recently started</b> document, or null if the DTM is
155   * empty or no document is currently under construction.
156   *
157   * %REVIEW% Should we continue to report the most recent after
158   * construction has ended? I think not, given that it may have been
159   * tail-pruned.
160   *
161   *  @return int Node handle of Document node, or null if this DTM does not
162   *  contain an "active" document.
163   * */
164  public int getDocument()
165  {
166    return makeNodeHandle(m_currentDocumentNode);
167  }
168
169  /**
170   * Given a node handle, find the owning document node, using DTM semantics
171   * (Document owns itself) rather than DOM semantics (Document has no owner).
172   *
173   * (I'm counting on the fact that getOwnerDocument() is implemented on top
174   * of this call, in the superclass, to avoid having to rewrite that one.
175   * Be careful if that code changes!)
176   *
177   * @param nodeHandle the id of the node.
178   * @return int Node handle of owning document
179   */
180  public int getDocumentRoot(int nodeHandle)
181  {
182    for (int id=makeNodeIdentity(nodeHandle); id!=NULL; id=_parent(id)) {
183      if (_type(id)==DTM.DOCUMENT_NODE) {
184        return makeNodeHandle(id);
185      }
186    }
187
188    return DTM.NULL; // Safety net; should never happen
189  }
190
191  /**
192   * Given a node identifier, find the owning document node.  Unlike the DOM,
193   * this considers the owningDocument of a Document to be itself. Note that
194   * in shared DTMs this may not be zero.
195   *
196   * @param nodeIdentifier the id of the starting node.
197   * @return int Node identifier of the root of this DTM tree
198   */
199  protected int _documentRoot(int nodeIdentifier)
200  {
201    if(nodeIdentifier==NULL) return NULL;
202
203    for (int parent=_parent(nodeIdentifier);
204         parent!=NULL;
205         nodeIdentifier=parent,parent=_parent(nodeIdentifier))
206      ;
207
208    return nodeIdentifier;
209  }
210
211  /**
212   * Receive notification of the beginning of a new RTF document.
213   *
214   * %REVIEW% Y'know, this isn't all that much of a deoptimization. We
215   * might want to consider folding the start/endDocument changes back
216   * into the main SAX2DTM so we don't have to expose so many fields
217   * (even as Protected) and carry the additional code.
218   *
219   * @throws SAXException Any SAX exception, possibly
220   *            wrapping another exception.
221   * @see org.xml.sax.ContentHandler#startDocument
222   * */
223  public void startDocument() throws SAXException
224  {
225    // Re-initialize the tree append process
226    m_endDocumentOccured = false;
227    m_prefixMappings = new java.util.Vector();
228    m_contextIndexes = new IntStack();
229    m_parents = new IntStack();
230
231    m_currentDocumentNode=m_size;
232    super.startDocument();
233  }
234
235  /**
236   * Receive notification of the end of the document.
237   *
238   * %REVIEW% Y'know, this isn't all that much of a deoptimization. We
239   * might want to consider folding the start/endDocument changes back
240   * into the main SAX2DTM so we don't have to expose so many fields
241   * (even as Protected).
242   *
243   * @throws SAXException Any SAX exception, possibly
244   *            wrapping another exception.
245   * @see org.xml.sax.ContentHandler#endDocument
246   * */
247  public void endDocument() throws SAXException
248  {
249    charactersFlush();
250
251    m_nextsib.setElementAt(NULL,m_currentDocumentNode);
252
253    if (m_firstch.elementAt(m_currentDocumentNode) == NOTPROCESSED)
254      m_firstch.setElementAt(NULL,m_currentDocumentNode);
255
256    if (DTM.NULL != m_previous)
257      m_nextsib.setElementAt(DTM.NULL,m_previous);
258
259    m_parents = null;
260    m_prefixMappings = null;
261    m_contextIndexes = null;
262
263    m_currentDocumentNode= NULL; // no longer open
264    m_endDocumentOccured = true;
265  }
266
267
268  /** "Tail-pruning" support for RTFs.
269   *
270   * This function pushes information about the current size of the
271   * DTM's data structures onto a stack, for use by popRewindMark()
272   * (which see).
273   *
274   * %REVIEW% I have no idea how to rewind m_elemIndexes. However,
275   * RTFs will not be indexed, so I can simply panic if that case
276   * arises. Hey, it works...
277   * */
278  public void pushRewindMark()
279  {
280    if(m_indexing || m_elemIndexes!=null)
281      throw new java.lang.NullPointerException("Coding error; Don't try to mark/rewind an indexed DTM");
282
283    // Values from DTMDefaultBase
284    // %REVIEW% Can the namespace stack sizes ever differ? If not, save space!
285    mark_size.push(m_size);
286    mark_nsdeclset_size.push((m_namespaceDeclSets==null)
287                                   ? 0
288                                   : m_namespaceDeclSets.size());
289    mark_nsdeclelem_size.push((m_namespaceDeclSetElements==null)
290                                   ? 0
291                                   : m_namespaceDeclSetElements.size());
292
293    // Values from SAX2DTM
294    mark_data_size.push(m_data.size());
295    mark_char_size.push(m_chars.size());
296    mark_doq_size.push(m_dataOrQName.size());
297  }
298
299  /** "Tail-pruning" support for RTFs.
300   *
301   * This function pops the information previously saved by
302   * pushRewindMark (which see) and uses it to discard all nodes added
303   * to the DTM after that time. We expect that this will allow us to
304   * reuse storage more effectively.
305   *
306   * This is _not_ intended to be called while a document is still being
307   * constructed -- only between endDocument and the next startDocument
308   *
309   * %REVIEW% WARNING: This is the first use of some of the truncation
310   * methods.  If Xalan blows up after this is called, that's a likely
311   * place to check.
312   *
313   * %REVIEW% Our original design for DTMs permitted them to share
314   * string pools.  If there any risk that this might be happening, we
315   * can _not_ rewind and recover the string storage. One solution
316   * might to assert that DTMs used for RTFs Must Not take advantage
317   * of that feature, but this seems excessively fragile. Another, much
318   * less attractive, would be to just let them leak... Nah.
319   *
320   * @return true if and only if the pop completely emptied the
321   * RTF. That response is used when determining how to unspool
322   * RTF-started-while-RTF-open situations.
323   * */
324  public boolean popRewindMark()
325  {
326    boolean top=mark_size.empty();
327
328    m_size=top ? m_emptyNodeCount : mark_size.pop();
329    m_exptype.setSize(m_size);
330    m_firstch.setSize(m_size);
331    m_nextsib.setSize(m_size);
332    m_prevsib.setSize(m_size);
333    m_parent.setSize(m_size);
334
335    m_elemIndexes=null;
336
337    int ds= top ? m_emptyNSDeclSetCount : mark_nsdeclset_size.pop();
338    if (m_namespaceDeclSets!=null) {
339      m_namespaceDeclSets.setSize(ds);
340    }
341
342    int ds1= top ? m_emptyNSDeclSetElemsCount : mark_nsdeclelem_size.pop();
343    if (m_namespaceDeclSetElements!=null) {
344      m_namespaceDeclSetElements.setSize(ds1);
345    }
346
347    // Values from SAX2DTM - m_data always has a reserved entry
348    m_data.setSize(top ? m_emptyDataCount : mark_data_size.pop());
349    m_chars.setLength(top ? m_emptyCharsCount : mark_char_size.pop());
350    m_dataOrQName.setSize(top ? m_emptyDataQNCount : mark_doq_size.pop());
351
352    // Return true iff DTM now empty
353    return m_size==0;
354  }
355
356  /** @return true if a DTM tree is currently under construction.
357   * */
358  public boolean isTreeIncomplete()
359  {
360    return !m_endDocumentOccured;
361  }
362}
363