001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the "License");
007 * you may not use this file except in compliance with the License.
008 * You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 /*
019 * $Id: SAX2RTFDTM.java 468653 2006-10-28 07:07:05Z minchau $
020 */
021 package org.apache.xml.dtm.ref.sax2dtm;
022
023 import javax.xml.transform.Source;
024
025 import org.apache.xml.dtm.DTM;
026 import org.apache.xml.dtm.DTMManager;
027 import org.apache.xml.dtm.DTMWSFilter;
028 import org.apache.xml.utils.IntStack;
029 import org.apache.xml.utils.IntVector;
030 import org.apache.xml.utils.StringVector;
031 import org.apache.xml.utils.XMLStringFactory;
032
033 import org.xml.sax.SAXException;
034
035 /**
036 * This is a subclass of SAX2DTM which has been modified to meet the needs of
037 * Result Tree Frameworks (RTFs). The differences are:
038 *
039 * 1) Multiple XML trees may be appended to the single DTM. This means
040 * that the root node of each document is _not_ node 0. Some code has
041 * had to be deoptimized to support this mode of operation, and an
042 * explicit mechanism for obtaining the Node Handle of the root node
043 * has been provided.
044 *
045 * 2) A stack of these documents is maintained, allowing us to "tail-prune" the
046 * most recently added trees off the end of the DTM as stylesheet elements
047 * (and thus variable contexts) are exited.
048 *
049 * PLEASE NOTE that this class may be _heavily_ dependent upon the
050 * internals of the SAX2DTM superclass, and must be maintained in
051 * parallel with that code. Arguably, they should be conditionals
052 * within a single class... but they have deen separated for
053 * performance reasons. (In fact, one could even argue about which is
054 * the superclass and which is the subclass; the current arrangement
055 * is as much about preserving stability of existing code during
056 * development as anything else.)
057 *
058 * %REVIEW% In fact, since the differences are so minor, I think it
059 * may be possible/practical to fold them back into the base
060 * SAX2DTM. Consider that as a future code-size optimization.
061 * */
062 public class SAX2RTFDTM extends SAX2DTM
063 {
064 /** Set true to monitor SAX events and similar diagnostic info. */
065 private static final boolean DEBUG = false;
066
067 /** Most recently started Document, or null if the DTM is empty. */
068 private int m_currentDocumentNode=NULL;
069
070 /** Tail-pruning mark: Number of nodes in use */
071 IntStack mark_size=new IntStack();
072 /** Tail-pruning mark: Number of data items in use */
073 IntStack mark_data_size=new IntStack();
074 /** Tail-pruning mark: Number of size-of-data fields in use */
075 IntStack mark_char_size=new IntStack();
076 /** Tail-pruning mark: Number of dataOrQName slots in use */
077 IntStack mark_doq_size=new IntStack();
078 /** Tail-pruning mark: Number of namespace declaration sets in use
079 * %REVIEW% I don't think number of NS sets is ever different from number
080 * of NS elements. We can probabably reduce these to a single stack and save
081 * some storage.
082 * */
083 IntStack mark_nsdeclset_size=new IntStack();
084 /** Tail-pruning mark: Number of naespace declaration elements in use
085 * %REVIEW% I don't think number of NS sets is ever different from number
086 * of NS elements. We can probabably reduce these to a single stack and save
087 * some storage.
088 */
089 IntStack mark_nsdeclelem_size=new IntStack();
090
091 /**
092 * Tail-pruning mark: initial number of nodes in use
093 */
094 int m_emptyNodeCount;
095
096 /**
097 * Tail-pruning mark: initial number of namespace declaration sets
098 */
099 int m_emptyNSDeclSetCount;
100
101 /**
102 * Tail-pruning mark: initial number of namespace declaration elements
103 */
104 int m_emptyNSDeclSetElemsCount;
105
106 /**
107 * Tail-pruning mark: initial number of data items in use
108 */
109 int m_emptyDataCount;
110
111 /**
112 * Tail-pruning mark: initial number of characters in use
113 */
114 int m_emptyCharsCount;
115
116 /**
117 * Tail-pruning mark: default initial number of dataOrQName slots in use
118 */
119 int m_emptyDataQNCount;
120
121 public SAX2RTFDTM(DTMManager mgr, Source source, int dtmIdentity,
122 DTMWSFilter whiteSpaceFilter,
123 XMLStringFactory xstringfactory,
124 boolean doIndexing)
125 {
126 super(mgr, source, dtmIdentity, whiteSpaceFilter,
127 xstringfactory, doIndexing);
128
129 // NEVER track source locators for RTFs; they aren't meaningful. I think.
130 // (If we did track them, we'd need to tail-prune these too.)
131 //org.apache.xalan.processor.TransformerFactoryImpl.m_source_location;
132 m_useSourceLocationProperty=false;
133 m_sourceSystemId = (m_useSourceLocationProperty) ? new StringVector()
134 : null;
135 m_sourceLine = (m_useSourceLocationProperty) ? new IntVector() : null;
136 m_sourceColumn = (m_useSourceLocationProperty) ? new IntVector() : null;
137
138 // Record initial sizes of fields that are pushed and restored
139 // for RTF tail-pruning. More entries can be popped than pushed, so
140 // we need this to mark the primordial state of the DTM.
141 m_emptyNodeCount = m_size;
142 m_emptyNSDeclSetCount = (m_namespaceDeclSets == null)
143 ? 0 : m_namespaceDeclSets.size();
144 m_emptyNSDeclSetElemsCount = (m_namespaceDeclSetElements == null)
145 ? 0 : m_namespaceDeclSetElements.size();
146 m_emptyDataCount = m_data.size();
147 m_emptyCharsCount = m_chars.size();
148 m_emptyDataQNCount = m_dataOrQName.size();
149 }
150
151 /**
152 * Given a DTM, find the owning document node. In the case of
153 * SAX2RTFDTM, which may contain multiple documents, this returns
154 * the <b>most recently started</b> document, or null if the DTM is
155 * empty or no document is currently under construction.
156 *
157 * %REVIEW% Should we continue to report the most recent after
158 * construction has ended? I think not, given that it may have been
159 * tail-pruned.
160 *
161 * @return int Node handle of Document node, or null if this DTM does not
162 * contain an "active" document.
163 * */
164 public int getDocument()
165 {
166 return makeNodeHandle(m_currentDocumentNode);
167 }
168
169 /**
170 * Given a node handle, find the owning document node, using DTM semantics
171 * (Document owns itself) rather than DOM semantics (Document has no owner).
172 *
173 * (I'm counting on the fact that getOwnerDocument() is implemented on top
174 * of this call, in the superclass, to avoid having to rewrite that one.
175 * Be careful if that code changes!)
176 *
177 * @param nodeHandle the id of the node.
178 * @return int Node handle of owning document
179 */
180 public int getDocumentRoot(int nodeHandle)
181 {
182 for (int id=makeNodeIdentity(nodeHandle); id!=NULL; id=_parent(id)) {
183 if (_type(id)==DTM.DOCUMENT_NODE) {
184 return makeNodeHandle(id);
185 }
186 }
187
188 return DTM.NULL; // Safety net; should never happen
189 }
190
191 /**
192 * Given a node identifier, find the owning document node. Unlike the DOM,
193 * this considers the owningDocument of a Document to be itself. Note that
194 * in shared DTMs this may not be zero.
195 *
196 * @param nodeIdentifier the id of the starting node.
197 * @return int Node identifier of the root of this DTM tree
198 */
199 protected int _documentRoot(int nodeIdentifier)
200 {
201 if(nodeIdentifier==NULL) return NULL;
202
203 for (int parent=_parent(nodeIdentifier);
204 parent!=NULL;
205 nodeIdentifier=parent,parent=_parent(nodeIdentifier))
206 ;
207
208 return nodeIdentifier;
209 }
210
211 /**
212 * Receive notification of the beginning of a new RTF document.
213 *
214 * %REVIEW% Y'know, this isn't all that much of a deoptimization. We
215 * might want to consider folding the start/endDocument changes back
216 * into the main SAX2DTM so we don't have to expose so many fields
217 * (even as Protected) and carry the additional code.
218 *
219 * @throws SAXException Any SAX exception, possibly
220 * wrapping another exception.
221 * @see org.xml.sax.ContentHandler#startDocument
222 * */
223 public void startDocument() throws SAXException
224 {
225 // Re-initialize the tree append process
226 m_endDocumentOccured = false;
227 m_prefixMappings = new java.util.Vector();
228 m_contextIndexes = new IntStack();
229 m_parents = new IntStack();
230
231 m_currentDocumentNode=m_size;
232 super.startDocument();
233 }
234
235 /**
236 * Receive notification of the end of the document.
237 *
238 * %REVIEW% Y'know, this isn't all that much of a deoptimization. We
239 * might want to consider folding the start/endDocument changes back
240 * into the main SAX2DTM so we don't have to expose so many fields
241 * (even as Protected).
242 *
243 * @throws SAXException Any SAX exception, possibly
244 * wrapping another exception.
245 * @see org.xml.sax.ContentHandler#endDocument
246 * */
247 public void endDocument() throws SAXException
248 {
249 charactersFlush();
250
251 m_nextsib.setElementAt(NULL,m_currentDocumentNode);
252
253 if (m_firstch.elementAt(m_currentDocumentNode) == NOTPROCESSED)
254 m_firstch.setElementAt(NULL,m_currentDocumentNode);
255
256 if (DTM.NULL != m_previous)
257 m_nextsib.setElementAt(DTM.NULL,m_previous);
258
259 m_parents = null;
260 m_prefixMappings = null;
261 m_contextIndexes = null;
262
263 m_currentDocumentNode= NULL; // no longer open
264 m_endDocumentOccured = true;
265 }
266
267
268 /** "Tail-pruning" support for RTFs.
269 *
270 * This function pushes information about the current size of the
271 * DTM's data structures onto a stack, for use by popRewindMark()
272 * (which see).
273 *
274 * %REVIEW% I have no idea how to rewind m_elemIndexes. However,
275 * RTFs will not be indexed, so I can simply panic if that case
276 * arises. Hey, it works...
277 * */
278 public void pushRewindMark()
279 {
280 if(m_indexing || m_elemIndexes!=null)
281 throw new java.lang.NullPointerException("Coding error; Don't try to mark/rewind an indexed DTM");
282
283 // Values from DTMDefaultBase
284 // %REVIEW% Can the namespace stack sizes ever differ? If not, save space!
285 mark_size.push(m_size);
286 mark_nsdeclset_size.push((m_namespaceDeclSets==null)
287 ? 0
288 : m_namespaceDeclSets.size());
289 mark_nsdeclelem_size.push((m_namespaceDeclSetElements==null)
290 ? 0
291 : m_namespaceDeclSetElements.size());
292
293 // Values from SAX2DTM
294 mark_data_size.push(m_data.size());
295 mark_char_size.push(m_chars.size());
296 mark_doq_size.push(m_dataOrQName.size());
297 }
298
299 /** "Tail-pruning" support for RTFs.
300 *
301 * This function pops the information previously saved by
302 * pushRewindMark (which see) and uses it to discard all nodes added
303 * to the DTM after that time. We expect that this will allow us to
304 * reuse storage more effectively.
305 *
306 * This is _not_ intended to be called while a document is still being
307 * constructed -- only between endDocument and the next startDocument
308 *
309 * %REVIEW% WARNING: This is the first use of some of the truncation
310 * methods. If Xalan blows up after this is called, that's a likely
311 * place to check.
312 *
313 * %REVIEW% Our original design for DTMs permitted them to share
314 * string pools. If there any risk that this might be happening, we
315 * can _not_ rewind and recover the string storage. One solution
316 * might to assert that DTMs used for RTFs Must Not take advantage
317 * of that feature, but this seems excessively fragile. Another, much
318 * less attractive, would be to just let them leak... Nah.
319 *
320 * @return true if and only if the pop completely emptied the
321 * RTF. That response is used when determining how to unspool
322 * RTF-started-while-RTF-open situations.
323 * */
324 public boolean popRewindMark()
325 {
326 boolean top=mark_size.empty();
327
328 m_size=top ? m_emptyNodeCount : mark_size.pop();
329 m_exptype.setSize(m_size);
330 m_firstch.setSize(m_size);
331 m_nextsib.setSize(m_size);
332 m_prevsib.setSize(m_size);
333 m_parent.setSize(m_size);
334
335 m_elemIndexes=null;
336
337 int ds= top ? m_emptyNSDeclSetCount : mark_nsdeclset_size.pop();
338 if (m_namespaceDeclSets!=null) {
339 m_namespaceDeclSets.setSize(ds);
340 }
341
342 int ds1= top ? m_emptyNSDeclSetElemsCount : mark_nsdeclelem_size.pop();
343 if (m_namespaceDeclSetElements!=null) {
344 m_namespaceDeclSetElements.setSize(ds1);
345 }
346
347 // Values from SAX2DTM - m_data always has a reserved entry
348 m_data.setSize(top ? m_emptyDataCount : mark_data_size.pop());
349 m_chars.setLength(top ? m_emptyCharsCount : mark_char_size.pop());
350 m_dataOrQName.setSize(top ? m_emptyDataQNCount : mark_doq_size.pop());
351
352 // Return true iff DTM now empty
353 return m_size==0;
354 }
355
356 /** @return true if a DTM tree is currently under construction.
357 * */
358 public boolean isTreeIncomplete()
359 {
360 return !m_endDocumentOccured;
361 }
362 }