001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements. See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership. The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the  "License");
007     * you may not use this file except in compliance with the License.
008     * You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    /*
019     * $Id: SAX2RTFDTM.java 468653 2006-10-28 07:07:05Z minchau $
020     */
021    package org.apache.xml.dtm.ref.sax2dtm;
022    
023    import javax.xml.transform.Source;
024    
025    import org.apache.xml.dtm.DTM;
026    import org.apache.xml.dtm.DTMManager;
027    import org.apache.xml.dtm.DTMWSFilter;
028    import org.apache.xml.utils.IntStack;
029    import org.apache.xml.utils.IntVector;
030    import org.apache.xml.utils.StringVector;
031    import org.apache.xml.utils.XMLStringFactory;
032    
033    import org.xml.sax.SAXException;
034    
035    /**
036     * This is a subclass of SAX2DTM which has been modified to meet the needs of
037     * Result Tree Frameworks (RTFs). The differences are:
038     *
039     * 1) Multiple XML trees may be appended to the single DTM. This means
040     * that the root node of each document is _not_ node 0. Some code has
041     * had to be deoptimized to support this mode of operation, and an
042     * explicit mechanism for obtaining the Node Handle of the root node
043     * has been provided.
044     *
045     * 2) A stack of these documents is maintained, allowing us to "tail-prune" the
046     * most recently added trees off the end of the DTM as stylesheet elements 
047     * (and thus variable contexts) are exited.
048     *
049     * PLEASE NOTE that this class may be _heavily_ dependent upon the
050     * internals of the SAX2DTM superclass, and must be maintained in
051     * parallel with that code.  Arguably, they should be conditionals
052     * within a single class... but they have deen separated for
053     * performance reasons. (In fact, one could even argue about which is
054     * the superclass and which is the subclass; the current arrangement
055     * is as much about preserving stability of existing code during
056     * development as anything else.)
057     * 
058     * %REVIEW% In fact, since the differences are so minor, I think it
059     * may be possible/practical to fold them back into the base
060     * SAX2DTM. Consider that as a future code-size optimization.
061     * */
062    public class SAX2RTFDTM extends SAX2DTM
063    {
064      /** Set true to monitor SAX events and similar diagnostic info. */
065      private static final boolean DEBUG = false;
066      
067      /** Most recently started Document, or null if the DTM is empty.  */
068      private int m_currentDocumentNode=NULL;
069      
070      /** Tail-pruning mark: Number of nodes in use */
071      IntStack mark_size=new IntStack();
072      /** Tail-pruning mark: Number of data items in use */
073      IntStack mark_data_size=new IntStack();
074      /** Tail-pruning mark: Number of size-of-data fields in use */
075      IntStack mark_char_size=new IntStack();
076      /** Tail-pruning mark: Number of dataOrQName slots in use */
077      IntStack mark_doq_size=new IntStack();
078      /** Tail-pruning mark: Number of namespace declaration sets in use
079       * %REVIEW% I don't think number of NS sets is ever different from number
080       * of NS elements. We can probabably reduce these to a single stack and save
081       * some storage.
082       * */
083      IntStack mark_nsdeclset_size=new IntStack();
084      /** Tail-pruning mark: Number of naespace declaration elements in use
085       * %REVIEW% I don't think number of NS sets is ever different from number
086       * of NS elements. We can probabably reduce these to a single stack and save
087       * some storage.
088       */
089      IntStack mark_nsdeclelem_size=new IntStack();
090    
091      /**
092       * Tail-pruning mark:  initial number of nodes in use
093       */
094      int m_emptyNodeCount;
095    
096      /**
097       * Tail-pruning mark:  initial number of namespace declaration sets
098       */
099      int m_emptyNSDeclSetCount;
100    
101      /**
102       * Tail-pruning mark:  initial number of namespace declaration elements
103       */
104      int m_emptyNSDeclSetElemsCount;
105    
106      /**
107       * Tail-pruning mark:  initial number of data items in use
108       */
109      int m_emptyDataCount;
110    
111      /**
112       * Tail-pruning mark:  initial number of characters in use
113       */
114      int m_emptyCharsCount;
115    
116      /**
117       * Tail-pruning mark:  default initial number of dataOrQName slots in use
118       */
119      int m_emptyDataQNCount;
120      
121      public SAX2RTFDTM(DTMManager mgr, Source source, int dtmIdentity,
122                     DTMWSFilter whiteSpaceFilter,
123                     XMLStringFactory xstringfactory,
124                     boolean doIndexing)
125      {
126        super(mgr, source, dtmIdentity, whiteSpaceFilter, 
127              xstringfactory, doIndexing);
128              
129        // NEVER track source locators for RTFs; they aren't meaningful. I think.
130        // (If we did track them, we'd need to tail-prune these too.)
131        //org.apache.xalan.processor.TransformerFactoryImpl.m_source_location;
132        m_useSourceLocationProperty=false;
133        m_sourceSystemId = (m_useSourceLocationProperty) ? new StringVector()
134                                                         : null;
135        m_sourceLine = (m_useSourceLocationProperty) ? new IntVector() : null;
136        m_sourceColumn = (m_useSourceLocationProperty) ? new IntVector() : null;
137    
138        // Record initial sizes of fields that are pushed and restored
139        // for RTF tail-pruning.  More entries can be popped than pushed, so
140        // we need this to mark the primordial state of the DTM.
141        m_emptyNodeCount = m_size;
142        m_emptyNSDeclSetCount = (m_namespaceDeclSets == null)
143                                     ? 0 : m_namespaceDeclSets.size();
144        m_emptyNSDeclSetElemsCount = (m_namespaceDeclSetElements == null)
145                                          ? 0 : m_namespaceDeclSetElements.size();
146        m_emptyDataCount = m_data.size();
147        m_emptyCharsCount = m_chars.size();
148        m_emptyDataQNCount = m_dataOrQName.size();
149      }
150      
151      /**
152       * Given a DTM, find the owning document node. In the case of
153       * SAX2RTFDTM, which may contain multiple documents, this returns
154       * the <b>most recently started</b> document, or null if the DTM is
155       * empty or no document is currently under construction.
156       *
157       * %REVIEW% Should we continue to report the most recent after
158       * construction has ended? I think not, given that it may have been
159       * tail-pruned.
160       *
161       *  @return int Node handle of Document node, or null if this DTM does not
162       *  contain an "active" document.
163       * */
164      public int getDocument()
165      {
166        return makeNodeHandle(m_currentDocumentNode);
167      }
168    
169      /**
170       * Given a node handle, find the owning document node, using DTM semantics
171       * (Document owns itself) rather than DOM semantics (Document has no owner).
172       *
173       * (I'm counting on the fact that getOwnerDocument() is implemented on top
174       * of this call, in the superclass, to avoid having to rewrite that one.
175       * Be careful if that code changes!)
176       *
177       * @param nodeHandle the id of the node.
178       * @return int Node handle of owning document
179       */
180      public int getDocumentRoot(int nodeHandle)
181      {
182        for (int id=makeNodeIdentity(nodeHandle); id!=NULL; id=_parent(id)) {
183          if (_type(id)==DTM.DOCUMENT_NODE) {
184            return makeNodeHandle(id);
185          }
186        }
187    
188        return DTM.NULL; // Safety net; should never happen
189      }
190      
191      /**
192       * Given a node identifier, find the owning document node.  Unlike the DOM,
193       * this considers the owningDocument of a Document to be itself. Note that
194       * in shared DTMs this may not be zero.
195       *
196       * @param nodeIdentifier the id of the starting node.
197       * @return int Node identifier of the root of this DTM tree
198       */
199      protected int _documentRoot(int nodeIdentifier)
200      {
201        if(nodeIdentifier==NULL) return NULL;
202    
203        for (int parent=_parent(nodeIdentifier);
204             parent!=NULL;
205             nodeIdentifier=parent,parent=_parent(nodeIdentifier))
206          ;
207       
208        return nodeIdentifier;
209      }
210    
211      /**
212       * Receive notification of the beginning of a new RTF document.
213       *
214       * %REVIEW% Y'know, this isn't all that much of a deoptimization. We
215       * might want to consider folding the start/endDocument changes back
216       * into the main SAX2DTM so we don't have to expose so many fields
217       * (even as Protected) and carry the additional code.
218       *
219       * @throws SAXException Any SAX exception, possibly
220       *            wrapping another exception.
221       * @see org.xml.sax.ContentHandler#startDocument
222       * */
223      public void startDocument() throws SAXException
224      {
225        // Re-initialize the tree append process
226        m_endDocumentOccured = false;
227        m_prefixMappings = new java.util.Vector();
228        m_contextIndexes = new IntStack();
229        m_parents = new IntStack();
230       
231        m_currentDocumentNode=m_size;
232        super.startDocument();
233      }
234     
235      /**
236       * Receive notification of the end of the document.
237       *
238       * %REVIEW% Y'know, this isn't all that much of a deoptimization. We
239       * might want to consider folding the start/endDocument changes back
240       * into the main SAX2DTM so we don't have to expose so many fields
241       * (even as Protected).
242       *
243       * @throws SAXException Any SAX exception, possibly
244       *            wrapping another exception.
245       * @see org.xml.sax.ContentHandler#endDocument
246       * */
247      public void endDocument() throws SAXException
248      {
249        charactersFlush();
250    
251        m_nextsib.setElementAt(NULL,m_currentDocumentNode);
252    
253        if (m_firstch.elementAt(m_currentDocumentNode) == NOTPROCESSED)
254          m_firstch.setElementAt(NULL,m_currentDocumentNode);
255    
256        if (DTM.NULL != m_previous)
257          m_nextsib.setElementAt(DTM.NULL,m_previous);
258    
259        m_parents = null;
260        m_prefixMappings = null;
261        m_contextIndexes = null;
262    
263        m_currentDocumentNode= NULL; // no longer open
264        m_endDocumentOccured = true;
265      }
266     
267    
268      /** "Tail-pruning" support for RTFs.
269       *
270       * This function pushes information about the current size of the
271       * DTM's data structures onto a stack, for use by popRewindMark()
272       * (which see).
273       *
274       * %REVIEW% I have no idea how to rewind m_elemIndexes. However,
275       * RTFs will not be indexed, so I can simply panic if that case
276       * arises. Hey, it works...
277       * */
278      public void pushRewindMark()
279      {
280        if(m_indexing || m_elemIndexes!=null)
281          throw new java.lang.NullPointerException("Coding error; Don't try to mark/rewind an indexed DTM");
282    
283        // Values from DTMDefaultBase
284        // %REVIEW% Can the namespace stack sizes ever differ? If not, save space!
285        mark_size.push(m_size);
286        mark_nsdeclset_size.push((m_namespaceDeclSets==null)
287                                       ? 0
288                                       : m_namespaceDeclSets.size());
289        mark_nsdeclelem_size.push((m_namespaceDeclSetElements==null)
290                                       ? 0
291                                       : m_namespaceDeclSetElements.size());
292       
293        // Values from SAX2DTM
294        mark_data_size.push(m_data.size());
295        mark_char_size.push(m_chars.size());
296        mark_doq_size.push(m_dataOrQName.size());
297      }
298     
299      /** "Tail-pruning" support for RTFs.
300       *
301       * This function pops the information previously saved by
302       * pushRewindMark (which see) and uses it to discard all nodes added
303       * to the DTM after that time. We expect that this will allow us to
304       * reuse storage more effectively.
305       *
306       * This is _not_ intended to be called while a document is still being
307       * constructed -- only between endDocument and the next startDocument
308       *
309       * %REVIEW% WARNING: This is the first use of some of the truncation
310       * methods.  If Xalan blows up after this is called, that's a likely
311       * place to check.
312       *
313       * %REVIEW% Our original design for DTMs permitted them to share
314       * string pools.  If there any risk that this might be happening, we
315       * can _not_ rewind and recover the string storage. One solution
316       * might to assert that DTMs used for RTFs Must Not take advantage
317       * of that feature, but this seems excessively fragile. Another, much
318       * less attractive, would be to just let them leak... Nah.
319       *
320       * @return true if and only if the pop completely emptied the
321       * RTF. That response is used when determining how to unspool
322       * RTF-started-while-RTF-open situations.
323       * */
324      public boolean popRewindMark()
325      {
326        boolean top=mark_size.empty();
327       
328        m_size=top ? m_emptyNodeCount : mark_size.pop();
329        m_exptype.setSize(m_size);
330        m_firstch.setSize(m_size);
331        m_nextsib.setSize(m_size);
332        m_prevsib.setSize(m_size);
333        m_parent.setSize(m_size);
334    
335        m_elemIndexes=null;
336    
337        int ds= top ? m_emptyNSDeclSetCount : mark_nsdeclset_size.pop();
338        if (m_namespaceDeclSets!=null) {
339          m_namespaceDeclSets.setSize(ds);
340        }
341    
342        int ds1= top ? m_emptyNSDeclSetElemsCount : mark_nsdeclelem_size.pop();
343        if (m_namespaceDeclSetElements!=null) {
344          m_namespaceDeclSetElements.setSize(ds1);
345        }
346     
347        // Values from SAX2DTM - m_data always has a reserved entry
348        m_data.setSize(top ? m_emptyDataCount : mark_data_size.pop());
349        m_chars.setLength(top ? m_emptyCharsCount : mark_char_size.pop());
350        m_dataOrQName.setSize(top ? m_emptyDataQNCount : mark_doq_size.pop());
351    
352        // Return true iff DTM now empty
353        return m_size==0;
354      }
355     
356      /** @return true if a DTM tree is currently under construction.
357       * */
358      public boolean isTreeIncomplete()
359      {
360        return !m_endDocumentOccured;
361      }
362    }