/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * $Id: SAX2RTFDTM.java 468653 2006-10-28 07:07:05Z minchau $ */ package org.apache.xml.dtm.ref.sax2dtm; import javax.xml.transform.Source; import org.apache.xml.dtm.DTM; import org.apache.xml.dtm.DTMManager; import org.apache.xml.dtm.DTMWSFilter; import org.apache.xml.utils.IntStack; import org.apache.xml.utils.IntVector; import org.apache.xml.utils.StringVector; import org.apache.xml.utils.XMLStringFactory; import org.xml.sax.SAXException; /** * This is a subclass of SAX2DTM which has been modified to meet the needs of * Result Tree Frameworks (RTFs). The differences are: * * 1) Multiple XML trees may be appended to the single DTM. This means * that the root node of each document is _not_ node 0. Some code has * had to be deoptimized to support this mode of operation, and an * explicit mechanism for obtaining the Node Handle of the root node * has been provided. * * 2) A stack of these documents is maintained, allowing us to "tail-prune" the * most recently added trees off the end of the DTM as stylesheet elements * (and thus variable contexts) are exited. * * PLEASE NOTE that this class may be _heavily_ dependent upon the * internals of the SAX2DTM superclass, and must be maintained in * parallel with that code. Arguably, they should be conditionals * within a single class... but they have deen separated for * performance reasons. (In fact, one could even argue about which is * the superclass and which is the subclass; the current arrangement * is as much about preserving stability of existing code during * development as anything else.) * * %REVIEW% In fact, since the differences are so minor, I think it * may be possible/practical to fold them back into the base * SAX2DTM. Consider that as a future code-size optimization. * */ public class SAX2RTFDTM extends SAX2DTM { /** Set true to monitor SAX events and similar diagnostic info. */ private static final boolean DEBUG = false; /** Most recently started Document, or null if the DTM is empty. */ private int m_currentDocumentNode=NULL; /** Tail-pruning mark: Number of nodes in use */ IntStack mark_size=new IntStack(); /** Tail-pruning mark: Number of data items in use */ IntStack mark_data_size=new IntStack(); /** Tail-pruning mark: Number of size-of-data fields in use */ IntStack mark_char_size=new IntStack(); /** Tail-pruning mark: Number of dataOrQName slots in use */ IntStack mark_doq_size=new IntStack(); /** Tail-pruning mark: Number of namespace declaration sets in use * %REVIEW% I don't think number of NS sets is ever different from number * of NS elements. We can probabably reduce these to a single stack and save * some storage. * */ IntStack mark_nsdeclset_size=new IntStack(); /** Tail-pruning mark: Number of naespace declaration elements in use * %REVIEW% I don't think number of NS sets is ever different from number * of NS elements. We can probabably reduce these to a single stack and save * some storage. */ IntStack mark_nsdeclelem_size=new IntStack(); /** * Tail-pruning mark: initial number of nodes in use */ int m_emptyNodeCount; /** * Tail-pruning mark: initial number of namespace declaration sets */ int m_emptyNSDeclSetCount; /** * Tail-pruning mark: initial number of namespace declaration elements */ int m_emptyNSDeclSetElemsCount; /** * Tail-pruning mark: initial number of data items in use */ int m_emptyDataCount; /** * Tail-pruning mark: initial number of characters in use */ int m_emptyCharsCount; /** * Tail-pruning mark: default initial number of dataOrQName slots in use */ int m_emptyDataQNCount; public SAX2RTFDTM(DTMManager mgr, Source source, int dtmIdentity, DTMWSFilter whiteSpaceFilter, XMLStringFactory xstringfactory, boolean doIndexing) { super(mgr, source, dtmIdentity, whiteSpaceFilter, xstringfactory, doIndexing); // NEVER track source locators for RTFs; they aren't meaningful. I think. // (If we did track them, we'd need to tail-prune these too.) //org.apache.xalan.processor.TransformerFactoryImpl.m_source_location; m_useSourceLocationProperty=false; m_sourceSystemId = (m_useSourceLocationProperty) ? new StringVector() : null; m_sourceLine = (m_useSourceLocationProperty) ? new IntVector() : null; m_sourceColumn = (m_useSourceLocationProperty) ? new IntVector() : null; // Record initial sizes of fields that are pushed and restored // for RTF tail-pruning. More entries can be popped than pushed, so // we need this to mark the primordial state of the DTM. m_emptyNodeCount = m_size; m_emptyNSDeclSetCount = (m_namespaceDeclSets == null) ? 0 : m_namespaceDeclSets.size(); m_emptyNSDeclSetElemsCount = (m_namespaceDeclSetElements == null) ? 0 : m_namespaceDeclSetElements.size(); m_emptyDataCount = m_data.size(); m_emptyCharsCount = m_chars.size(); m_emptyDataQNCount = m_dataOrQName.size(); } /** * Given a DTM, find the owning document node. In the case of * SAX2RTFDTM, which may contain multiple documents, this returns * the most recently started document, or null if the DTM is * empty or no document is currently under construction. * * %REVIEW% Should we continue to report the most recent after * construction has ended? I think not, given that it may have been * tail-pruned. * * @return int Node handle of Document node, or null if this DTM does not * contain an "active" document. * */ public int getDocument() { return makeNodeHandle(m_currentDocumentNode); } /** * Given a node handle, find the owning document node, using DTM semantics * (Document owns itself) rather than DOM semantics (Document has no owner). * * (I'm counting on the fact that getOwnerDocument() is implemented on top * of this call, in the superclass, to avoid having to rewrite that one. * Be careful if that code changes!) * * @param nodeHandle the id of the node. * @return int Node handle of owning document */ public int getDocumentRoot(int nodeHandle) { for (int id=makeNodeIdentity(nodeHandle); id!=NULL; id=_parent(id)) { if (_type(id)==DTM.DOCUMENT_NODE) { return makeNodeHandle(id); } } return DTM.NULL; // Safety net; should never happen } /** * Given a node identifier, find the owning document node. Unlike the DOM, * this considers the owningDocument of a Document to be itself. Note that * in shared DTMs this may not be zero. * * @param nodeIdentifier the id of the starting node. * @return int Node identifier of the root of this DTM tree */ protected int _documentRoot(int nodeIdentifier) { if(nodeIdentifier==NULL) return NULL; for (int parent=_parent(nodeIdentifier); parent!=NULL; nodeIdentifier=parent,parent=_parent(nodeIdentifier)) ; return nodeIdentifier; } /** * Receive notification of the beginning of a new RTF document. * * %REVIEW% Y'know, this isn't all that much of a deoptimization. We * might want to consider folding the start/endDocument changes back * into the main SAX2DTM so we don't have to expose so many fields * (even as Protected) and carry the additional code. * * @throws SAXException Any SAX exception, possibly * wrapping another exception. * @see org.xml.sax.ContentHandler#startDocument * */ public void startDocument() throws SAXException { // Re-initialize the tree append process m_endDocumentOccured = false; m_prefixMappings = new java.util.Vector(); m_contextIndexes = new IntStack(); m_parents = new IntStack(); m_currentDocumentNode=m_size; super.startDocument(); } /** * Receive notification of the end of the document. * * %REVIEW% Y'know, this isn't all that much of a deoptimization. We * might want to consider folding the start/endDocument changes back * into the main SAX2DTM so we don't have to expose so many fields * (even as Protected). * * @throws SAXException Any SAX exception, possibly * wrapping another exception. * @see org.xml.sax.ContentHandler#endDocument * */ public void endDocument() throws SAXException { charactersFlush(); m_nextsib.setElementAt(NULL,m_currentDocumentNode); if (m_firstch.elementAt(m_currentDocumentNode) == NOTPROCESSED) m_firstch.setElementAt(NULL,m_currentDocumentNode); if (DTM.NULL != m_previous) m_nextsib.setElementAt(DTM.NULL,m_previous); m_parents = null; m_prefixMappings = null; m_contextIndexes = null; m_currentDocumentNode= NULL; // no longer open m_endDocumentOccured = true; } /** "Tail-pruning" support for RTFs. * * This function pushes information about the current size of the * DTM's data structures onto a stack, for use by popRewindMark() * (which see). * * %REVIEW% I have no idea how to rewind m_elemIndexes. However, * RTFs will not be indexed, so I can simply panic if that case * arises. Hey, it works... * */ public void pushRewindMark() { if(m_indexing || m_elemIndexes!=null) throw new java.lang.NullPointerException("Coding error; Don't try to mark/rewind an indexed DTM"); // Values from DTMDefaultBase // %REVIEW% Can the namespace stack sizes ever differ? If not, save space! mark_size.push(m_size); mark_nsdeclset_size.push((m_namespaceDeclSets==null) ? 0 : m_namespaceDeclSets.size()); mark_nsdeclelem_size.push((m_namespaceDeclSetElements==null) ? 0 : m_namespaceDeclSetElements.size()); // Values from SAX2DTM mark_data_size.push(m_data.size()); mark_char_size.push(m_chars.size()); mark_doq_size.push(m_dataOrQName.size()); } /** "Tail-pruning" support for RTFs. * * This function pops the information previously saved by * pushRewindMark (which see) and uses it to discard all nodes added * to the DTM after that time. We expect that this will allow us to * reuse storage more effectively. * * This is _not_ intended to be called while a document is still being * constructed -- only between endDocument and the next startDocument * * %REVIEW% WARNING: This is the first use of some of the truncation * methods. If Xalan blows up after this is called, that's a likely * place to check. * * %REVIEW% Our original design for DTMs permitted them to share * string pools. If there any risk that this might be happening, we * can _not_ rewind and recover the string storage. One solution * might to assert that DTMs used for RTFs Must Not take advantage * of that feature, but this seems excessively fragile. Another, much * less attractive, would be to just let them leak... Nah. * * @return true if and only if the pop completely emptied the * RTF. That response is used when determining how to unspool * RTF-started-while-RTF-open situations. * */ public boolean popRewindMark() { boolean top=mark_size.empty(); m_size=top ? m_emptyNodeCount : mark_size.pop(); m_exptype.setSize(m_size); m_firstch.setSize(m_size); m_nextsib.setSize(m_size); m_prevsib.setSize(m_size); m_parent.setSize(m_size); m_elemIndexes=null; int ds= top ? m_emptyNSDeclSetCount : mark_nsdeclset_size.pop(); if (m_namespaceDeclSets!=null) { m_namespaceDeclSets.setSize(ds); } int ds1= top ? m_emptyNSDeclSetElemsCount : mark_nsdeclelem_size.pop(); if (m_namespaceDeclSetElements!=null) { m_namespaceDeclSetElements.setSize(ds1); } // Values from SAX2DTM - m_data always has a reserved entry m_data.setSize(top ? m_emptyDataCount : mark_data_size.pop()); m_chars.setLength(top ? m_emptyCharsCount : mark_char_size.pop()); m_dataOrQName.setSize(top ? m_emptyDataQNCount : mark_doq_size.pop()); // Return true iff DTM now empty return m_size==0; } /** @return true if a DTM tree is currently under construction. * */ public boolean isTreeIncomplete() { return !m_endDocumentOccured; } }