001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements. See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership. The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the  "License");
007     * you may not use this file except in compliance with the License.
008     * You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    /*
019     * $Id: DOM2DTM.java 478671 2006-11-23 21:00:31Z minchau $
020     */
021    package org.apache.xml.dtm.ref.dom2dtm;
022    
023    import java.util.Vector;
024    
025    import javax.xml.transform.SourceLocator;
026    import javax.xml.transform.dom.DOMSource;
027    
028    import org.apache.xml.dtm.DTM;
029    import org.apache.xml.dtm.DTMManager;
030    import org.apache.xml.dtm.DTMWSFilter;
031    import org.apache.xml.dtm.ref.DTMDefaultBaseIterators;
032    import org.apache.xml.dtm.ref.DTMManagerDefault;
033    import org.apache.xml.dtm.ref.ExpandedNameTable;
034    import org.apache.xml.dtm.ref.IncrementalSAXSource;
035    import org.apache.xml.res.XMLErrorResources;
036    import org.apache.xml.res.XMLMessages;
037    import org.apache.xml.utils.FastStringBuffer;
038    import org.apache.xml.utils.QName;
039    import org.apache.xml.utils.StringBufferPool;
040    import org.apache.xml.utils.TreeWalker;
041    import org.apache.xml.utils.XMLCharacterRecognizer;
042    import org.apache.xml.utils.XMLString;
043    import org.apache.xml.utils.XMLStringFactory;
044    import org.w3c.dom.Attr;
045    import org.w3c.dom.Document;
046    import org.w3c.dom.DocumentType;
047    import org.w3c.dom.Element;
048    import org.w3c.dom.Entity;
049    import org.w3c.dom.NamedNodeMap;
050    import org.w3c.dom.Node;
051    import org.xml.sax.ContentHandler;
052    
053    /** The <code>DOM2DTM</code> class serves up a DOM's contents via the
054     * DTM API.
055     *
056     * Note that it doesn't necessarily represent a full Document
057     * tree. You can wrap a DOM2DTM around a specific node and its subtree
058     * and the right things should happen. (I don't _think_ we currently
059     * support DocumentFrgment nodes as roots, though that might be worth
060     * considering.)
061     *
062     * Note too that we do not currently attempt to track document
063     * mutation. If you alter the DOM after wrapping DOM2DTM around it,
064     * all bets are off.
065     * */
066    public class DOM2DTM extends DTMDefaultBaseIterators
067    {
068      static final boolean JJK_DEBUG=false;
069      static final boolean JJK_NEWCODE=true;
070      
071      /** Manefest constant
072       */
073      static final String NAMESPACE_DECL_NS="http://www.w3.org/XML/1998/namespace";
074      
075      /** The current position in the DOM tree. Last node examined for
076       * possible copying to DTM. */
077      transient private Node m_pos;
078      /** The current position in the DTM tree. Who children get appended to. */
079      private int m_last_parent=0;
080      /** The current position in the DTM tree. Who children reference as their 
081       * previous sib. */
082      private int m_last_kid=NULL;
083    
084      /** The top of the subtree.
085       * %REVIEW%: 'may not be the same as m_context if "//foo" pattern.'
086       * */
087      transient private Node m_root;
088    
089      /** True iff the first element has been processed. This is used to control
090          synthesis of the implied xml: namespace declaration node. */
091      boolean m_processedFirstElement=false;
092            
093      /** true if ALL the nodes in the m_root subtree have been processed;
094       * false if our incremental build has not yet finished scanning the
095       * DOM tree.  */
096      transient private boolean m_nodesAreProcessed;
097    
098      /** The node objects.  The instance part of the handle indexes
099       * directly into this vector.  Each DTM node may actually be
100       * composed of several DOM nodes (for example, if logically-adjacent
101       * Text/CDATASection nodes in the DOM have been coalesced into a
102       * single DTM Text node); this table points only to the first in
103       * that sequence. */
104      protected Vector m_nodes = new Vector();
105    
106      /**
107       * Construct a DOM2DTM object from a DOM node.
108       *
109       * @param mgr The DTMManager who owns this DTM.
110       * @param domSource the DOM source that this DTM will wrap.
111       * @param dtmIdentity The DTM identity ID for this DTM.
112       * @param whiteSpaceFilter The white space filter for this DTM, which may 
113       *                         be null.
114       * @param xstringfactory XMLString factory for creating character content.
115       * @param doIndexing true if the caller considers it worth it to use 
116       *                   indexing schemes.
117       */
118      public DOM2DTM(DTMManager mgr, DOMSource domSource, 
119                     int dtmIdentity, DTMWSFilter whiteSpaceFilter,
120                     XMLStringFactory xstringfactory,
121                     boolean doIndexing)
122      {
123        super(mgr, domSource, dtmIdentity, whiteSpaceFilter, 
124              xstringfactory, doIndexing);
125    
126        // Initialize DOM navigation
127        m_pos=m_root = domSource.getNode();
128        // Initialize DTM navigation
129        m_last_parent=m_last_kid=NULL;
130        m_last_kid=addNode(m_root, m_last_parent,m_last_kid, NULL);
131    
132        // Apparently the domSource root may not actually be the
133        // Document node. If it's an Element node, we need to immediately
134        // add its attributes. Adapted from nextNode().
135        // %REVIEW% Move this logic into addNode and recurse? Cleaner!
136        //
137        // (If it's an EntityReference node, we're probably in 
138        // seriously bad trouble. For now
139        // I'm just hoping nobody is ever quite that foolish... %REVIEW%)
140                    //
141                    // %ISSUE% What about inherited namespaces in this case?
142                    // Do we need to special-case initialize them into the DTM model?
143        if(ELEMENT_NODE == m_root.getNodeType())
144        {
145          NamedNodeMap attrs=m_root.getAttributes();
146          int attrsize=(attrs==null) ? 0 : attrs.getLength();
147          if(attrsize>0)
148          {
149            int attrIndex=NULL; // start with no previous sib
150            for(int i=0;i<attrsize;++i)
151            {
152              // No need to force nodetype in this case;
153              // addNode() will take care of switching it from
154              // Attr to Namespace if necessary.
155              attrIndex=addNode(attrs.item(i),0,attrIndex,NULL);
156              m_firstch.setElementAt(DTM.NULL,attrIndex);
157            }
158            // Terminate list of attrs, and make sure they aren't
159            // considered children of the element
160            m_nextsib.setElementAt(DTM.NULL,attrIndex);
161    
162            // IMPORTANT: This does NOT change m_last_parent or m_last_kid!
163          } // if attrs exist
164        } //if(ELEMENT_NODE)
165    
166        // Initialize DTM-completed status 
167        m_nodesAreProcessed = false;
168      }
169    
170      /**
171       * Construct the node map from the node.
172       *
173       * @param node The node that is to be added to the DTM.
174       * @param parentIndex The current parent index.
175       * @param previousSibling The previous sibling index.
176       * @param forceNodeType If not DTM.NULL, overrides the DOM node type.
177       *    Used to force nodes to Text rather than CDATASection when their
178       *    coalesced value includes ordinary Text nodes (current DTM behavior).
179       *
180       * @return The index identity of the node that was added.
181       */
182      protected int addNode(Node node, int parentIndex,
183                            int previousSibling, int forceNodeType)
184      {
185        int nodeIndex = m_nodes.size();
186    
187        // Have we overflowed a DTM Identity's addressing range?
188        if(m_dtmIdent.size() == (nodeIndex>>>DTMManager.IDENT_DTM_NODE_BITS))
189        {
190          try
191          {
192            if(m_mgr==null)
193              throw new ClassCastException();
194                                    
195                                    // Handle as Extended Addressing
196            DTMManagerDefault mgrD=(DTMManagerDefault)m_mgr;
197            int id=mgrD.getFirstFreeDTMID();
198            mgrD.addDTM(this,id,nodeIndex);
199            m_dtmIdent.addElement(id<<DTMManager.IDENT_DTM_NODE_BITS);
200          }
201          catch(ClassCastException e)
202          {
203            // %REVIEW% Wrong error message, but I've been told we're trying
204            // not to add messages right not for I18N reasons.
205            // %REVIEW% Should this be a Fatal Error?
206            error(XMLMessages.createXMLMessage(XMLErrorResources.ER_NO_DTMIDS_AVAIL, null));//"No more DTM IDs are available";
207          }
208        }
209    
210        m_size++;
211        // ensureSize(nodeIndex);
212        
213        int type;
214        if(NULL==forceNodeType)
215            type = node.getNodeType();
216        else
217            type=forceNodeType;
218            
219        // %REVIEW% The Namespace Spec currently says that Namespaces are
220        // processed in a non-namespace-aware manner, by matching the
221        // QName, even though there is in fact a namespace assigned to
222        // these nodes in the DOM. If and when that changes, we will have
223        // to consider whether we check the namespace-for-namespaces
224        // rather than the node name.
225        //
226        // %TBD% Note that the DOM does not necessarily explicitly declare
227        // all the namespaces it uses. DOM Level 3 will introduce a
228        // namespace-normalization operation which reconciles that, and we
229        // can request that users invoke it or otherwise ensure that the
230        // tree is namespace-well-formed before passing the DOM to Xalan.
231        // But if they don't, what should we do about it? We probably
232        // don't want to alter the source DOM (and may not be able to do
233        // so if it's read-only). The best available answer might be to
234        // synthesize additional DTM Namespace Nodes that don't correspond
235        // to DOM Attr Nodes.
236        if (Node.ATTRIBUTE_NODE == type)
237        {
238          String name = node.getNodeName();
239    
240          if (name.startsWith("xmlns:") || name.equals("xmlns"))
241          {
242            type = DTM.NAMESPACE_NODE;
243          }
244        }
245        
246        m_nodes.addElement(node);
247        
248        m_firstch.setElementAt(NOTPROCESSED,nodeIndex);
249        m_nextsib.setElementAt(NOTPROCESSED,nodeIndex);
250        m_prevsib.setElementAt(previousSibling,nodeIndex);
251        m_parent.setElementAt(parentIndex,nodeIndex);
252        
253        if(DTM.NULL != parentIndex && 
254           type != DTM.ATTRIBUTE_NODE && 
255           type != DTM.NAMESPACE_NODE)
256        {
257          // If the DTM parent had no children, this becomes its first child.
258          if(NOTPROCESSED == m_firstch.elementAt(parentIndex))
259            m_firstch.setElementAt(nodeIndex,parentIndex);
260        }
261        
262        String nsURI = node.getNamespaceURI();
263    
264        // Deal with the difference between Namespace spec and XSLT
265        // definitions of local name. (The former says PIs don't have
266        // localnames; the latter says they do.)
267        String localName =  (type == Node.PROCESSING_INSTRUCTION_NODE) ? 
268                             node.getNodeName() :
269                             node.getLocalName();
270                             
271        // Hack to make DOM1 sort of work...
272        if(((type == Node.ELEMENT_NODE) || (type == Node.ATTRIBUTE_NODE)) 
273            && null == localName)
274          localName = node.getNodeName(); // -sb
275          
276        ExpandedNameTable exnt = m_expandedNameTable;
277    
278        // %TBD% Nodes created with the old non-namespace-aware DOM
279        // calls createElement() and createAttribute() will never have a
280        // localname. That will cause their expandedNameID to be just the
281        // nodeType... which will keep them from being matched
282        // successfully by name. Since the DOM makes no promise that
283        // those will participate in namespace processing, this is
284        // officially accepted as Not Our Fault. But it might be nice to
285        // issue a diagnostic message!
286        if(node.getLocalName()==null &&
287           (type==Node.ELEMENT_NODE || type==Node.ATTRIBUTE_NODE))
288          {
289            // warning("DOM 'level 1' node "+node.getNodeName()+" won't be mapped properly in DOM2DTM.");
290          }
291        
292        int expandedNameID = (null != localName) 
293           ? exnt.getExpandedTypeID(nsURI, localName, type) :
294             exnt.getExpandedTypeID(type);
295    
296        m_exptype.setElementAt(expandedNameID,nodeIndex);
297        
298        indexNode(expandedNameID, nodeIndex);
299    
300        if (DTM.NULL != previousSibling)
301          m_nextsib.setElementAt(nodeIndex,previousSibling);
302    
303        // This should be done after m_exptype has been set, and probably should
304        // always be the last thing we do
305        if (type == DTM.NAMESPACE_NODE)
306            declareNamespaceInContext(parentIndex,nodeIndex);
307    
308        return nodeIndex;
309      }
310      
311      /**
312       * Get the number of nodes that have been added.
313       */
314      public int getNumberOfNodes()
315      {
316        return m_nodes.size();
317      }
318      
319     /**
320       * This method iterates to the next node that will be added to the table.
321       * Each call to this method adds a new node to the table, unless the end
322       * is reached, in which case it returns null.
323       *
324       * @return The true if a next node is found or false if 
325       *         there are no more nodes.
326       */
327      protected boolean nextNode()
328      {
329        // Non-recursive one-fetch-at-a-time depth-first traversal with 
330        // attribute/namespace nodes and white-space stripping.
331        // Navigating the DOM is simple, navigating the DTM is simple;
332        // keeping track of both at once is a trifle baroque but at least
333        // we've avoided most of the special cases.
334        if (m_nodesAreProcessed)
335          return false;
336            
337        // %REVIEW% Is this local copy Really Useful from a performance
338        // point of view?  Or is this a false microoptimization?
339        Node pos=m_pos; 
340        Node next=null;
341        int nexttype=NULL;
342    
343        // Navigate DOM tree
344        do
345          {
346            // Look down to first child.
347            if (pos.hasChildNodes()) 
348              {
349                next = pos.getFirstChild();
350    
351                // %REVIEW% There's probably a more elegant way to skip
352                // the doctype. (Just let it go and Suppress it?
353                if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType())
354                  next=next.getNextSibling();
355    
356                // Push DTM context -- except for children of Entity References, 
357                // which have no DTM equivalent and cause no DTM navigation.
358                if(ENTITY_REFERENCE_NODE!=pos.getNodeType())
359                  {
360                    m_last_parent=m_last_kid;
361                    m_last_kid=NULL;
362                    // Whitespace-handler context stacking
363                    if(null != m_wsfilter)
364                    {
365                      short wsv =
366                        m_wsfilter.getShouldStripSpace(makeNodeHandle(m_last_parent),this);
367                      boolean shouldStrip = (DTMWSFilter.INHERIT == wsv) 
368                        ? getShouldStripWhitespace() 
369                        : (DTMWSFilter.STRIP == wsv);
370                      pushShouldStripWhitespace(shouldStrip);
371                    } // if(m_wsfilter)
372                  }
373              }
374    
375            // If that fails, look up and right (but not past root!)
376            else 
377              {
378                if(m_last_kid!=NULL)
379                  {
380                    // Last node posted at this level had no more children
381                    // If it has _no_ children, we need to record that.
382                    if(m_firstch.elementAt(m_last_kid)==NOTPROCESSED)
383                      m_firstch.setElementAt(NULL,m_last_kid);
384                  }
385                            
386                while(m_last_parent != NULL)
387                  {
388                    // %REVIEW% There's probably a more elegant way to
389                    // skip the doctype. (Just let it go and Suppress it?
390                    next = pos.getNextSibling();
391                    if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType())
392                      next=next.getNextSibling();
393    
394                    if(next!=null)
395                      break; // Found it!
396                    
397                    // No next-sibling found. Pop the DOM.
398                    pos=pos.getParentNode();
399                    if(pos==null)
400                      {
401                        // %TBD% Should never arise, but I want to be sure of that...
402                        if(JJK_DEBUG)
403                          {
404                            System.out.println("***** DOM2DTM Pop Control Flow problem");
405                            for(;;); // Freeze right here!
406                          }
407                      }
408                    
409                    // The only parents in the DTM are Elements.  However,
410                    // the DOM could contain EntityReferences.  If we
411                    // encounter one, pop it _without_ popping DTM.
412                    if(pos!=null && ENTITY_REFERENCE_NODE == pos.getNodeType())
413                      {
414                        // Nothing needs doing
415                        if(JJK_DEBUG)
416                          System.out.println("***** DOM2DTM popping EntRef");
417                      }
418                    else
419                      {
420                        popShouldStripWhitespace();
421                        // Fix and pop DTM
422                        if(m_last_kid==NULL)
423                          m_firstch.setElementAt(NULL,m_last_parent); // Popping from an element
424                        else
425                          m_nextsib.setElementAt(NULL,m_last_kid); // Popping from anything else
426                        m_last_parent=m_parent.elementAt(m_last_kid=m_last_parent);
427                      }
428                  }
429                if(m_last_parent==NULL)
430                  next=null;
431              }
432                    
433            if(next!=null)
434              nexttype=next.getNodeType();
435                    
436            // If it's an entity ref, advance past it.
437            //
438            // %REVIEW% Should we let this out the door and just suppress it?
439            // More work, but simpler code, more likely to be correct, and
440            // it doesn't happen very often. We'd get rid of the loop too.
441            if (ENTITY_REFERENCE_NODE == nexttype)
442              pos=next;
443          }
444        while (ENTITY_REFERENCE_NODE == nexttype); 
445            
446        // Did we run out of the tree?
447        if(next==null)
448          {
449            m_nextsib.setElementAt(NULL,0);
450            m_nodesAreProcessed = true;
451            m_pos=null;
452                    
453            if(JJK_DEBUG)
454              {
455                System.out.println("***** DOM2DTM Crosscheck:");
456                for(int i=0;i<m_nodes.size();++i)
457                  System.out.println(i+":\t"+m_firstch.elementAt(i)+"\t"+m_nextsib.elementAt(i));
458              }
459                    
460            return false;
461          }
462    
463        // Text needs some special handling:
464        //
465        // DTM may skip whitespace. This is handled by the suppressNode flag, which
466        // when true will keep the DTM node from being created.
467        //
468        // DTM only directly records the first DOM node of any logically-contiguous
469        // sequence. The lastTextNode value will be set to the last node in the 
470        // contiguous sequence, and -- AFTER the DTM addNode -- can be used to 
471        // advance next over this whole block. Should be simpler than special-casing
472        // the above loop for "Was the logically-preceeding sibling a text node".
473        // 
474        // Finally, a DTM node should be considered a CDATASection only if all the
475        // contiguous text it covers is CDATASections. The first Text should
476        // force DTM to Text.
477            
478        boolean suppressNode=false;
479        Node lastTextNode=null;
480    
481        nexttype=next.getNodeType();
482            
483        // nexttype=pos.getNodeType();
484        if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype)
485          {
486            // If filtering, initially assume we're going to suppress the node
487            suppressNode=((null != m_wsfilter) && getShouldStripWhitespace());
488    
489            // Scan logically contiguous text (siblings, plus "flattening"
490            // of entity reference boundaries).
491            Node n=next;
492            while(n!=null)
493              {
494                lastTextNode=n;
495                // Any Text node means DTM considers it all Text
496                if(TEXT_NODE == n.getNodeType())
497                  nexttype=TEXT_NODE;
498                // Any non-whitespace in this sequence blocks whitespace
499                // suppression
500                suppressNode &=
501                  XMLCharacterRecognizer.isWhiteSpace(n.getNodeValue());
502                            
503                n=logicalNextDOMTextNode(n);
504              }
505          }
506            
507        // Special handling for PIs: Some DOMs represent the XML
508        // Declaration as a PI. This is officially incorrect, per the DOM
509        // spec, but is considered a "wrong but tolerable" temporary
510        // workaround pending proper handling of these fields in DOM Level
511        // 3. We want to recognize and reject that case.
512        else if(PROCESSING_INSTRUCTION_NODE==nexttype)
513          {
514            suppressNode = (pos.getNodeName().toLowerCase().equals("xml"));
515          }
516            
517            
518        if(!suppressNode)
519          {
520            // Inserting next. NOTE that we force the node type; for
521            // coalesced Text, this records CDATASections adjacent to
522            // ordinary Text as Text.
523            int nextindex=addNode(next,m_last_parent,m_last_kid,
524                                  nexttype);
525            
526            m_last_kid=nextindex;
527    
528            if(ELEMENT_NODE == nexttype)
529              {
530                int attrIndex=NULL; // start with no previous sib
531                // Process attributes _now_, rather than waiting.
532                // Simpler control flow, makes NS cache available immediately.
533                NamedNodeMap attrs=next.getAttributes();
534                int attrsize=(attrs==null) ? 0 : attrs.getLength();
535                if(attrsize>0)
536                  {
537                    for(int i=0;i<attrsize;++i)
538                      {
539                        // No need to force nodetype in this case;
540                        // addNode() will take care of switching it from
541                        // Attr to Namespace if necessary.
542                        attrIndex=addNode(attrs.item(i),
543                                          nextindex,attrIndex,NULL);
544                        m_firstch.setElementAt(DTM.NULL,attrIndex);
545    
546                        // If the xml: prefix is explicitly declared
547                        // we don't need to synthesize one.
548                        //
549                        // NOTE that XML Namespaces were not originally
550                        // defined as being namespace-aware (grrr), and
551                        // while the W3C is planning to fix this it's
552                        // safer for now to test the QName and trust the
553                        // parsers to prevent anyone from redefining the
554                        // reserved xmlns: prefix
555                        if(!m_processedFirstElement
556                           && "xmlns:xml".equals(attrs.item(i).getNodeName()))
557                          m_processedFirstElement=true; 
558                      }
559                    // Terminate list of attrs, and make sure they aren't
560                    // considered children of the element
561                  } // if attrs exist
562                if(!m_processedFirstElement)
563                {
564                  // The DOM might not have an explicit declaration for the
565                  // implicit "xml:" prefix, but the XPath data model
566                  // requires that this appear as a Namespace Node so we
567                  // have to synthesize one. You can think of this as
568                  // being a default attribute defined by the XML
569                  // Namespaces spec rather than by the DTD.
570                  attrIndex=addNode(new DOM2DTMdefaultNamespaceDeclarationNode(
571                                                                                                                                            (Element)next,"xml",NAMESPACE_DECL_NS,
572                                                                                                                                            makeNodeHandle(((attrIndex==NULL)?nextindex:attrIndex)+1)
573                                                                                                                                            ),
574                                    nextindex,attrIndex,NULL);      
575                  m_firstch.setElementAt(DTM.NULL,attrIndex);
576                  m_processedFirstElement=true;
577                }
578                if(attrIndex!=NULL)
579                  m_nextsib.setElementAt(DTM.NULL,attrIndex);
580              } //if(ELEMENT_NODE)
581          } // (if !suppressNode)
582    
583        // Text postprocessing: Act on values stored above
584        if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype)
585          {
586            // %TBD% If nexttype was forced to TEXT, patch the DTM node
587                    
588            next=lastTextNode;      // Advance the DOM cursor over contiguous text
589          }
590            
591        // Remember where we left off.
592        m_pos=next;
593        return true;
594      }  
595    
596    
597      /**
598       * Return an DOM node for the given node.
599       *
600       * @param nodeHandle The node ID.
601       *
602       * @return A node representation of the DTM node.
603       */
604      public Node getNode(int nodeHandle)
605      {
606    
607        int identity = makeNodeIdentity(nodeHandle);
608    
609        return (Node) m_nodes.elementAt(identity);
610      }
611    
612      /**
613       * Get a Node from an identity index.
614       *
615       * NEEDSDOC @param nodeIdentity
616       *
617       * NEEDSDOC ($objectName$) @return
618       */
619      protected Node lookupNode(int nodeIdentity)
620      {
621        return (Node) m_nodes.elementAt(nodeIdentity);
622      }
623    
624      /**
625       * Get the next node identity value in the list, and call the iterator
626       * if it hasn't been added yet.
627       *
628       * @param identity The node identity (index).
629       * @return identity+1, or DTM.NULL.
630       */
631      protected int getNextNodeIdentity(int identity)
632      {
633    
634        identity += 1;
635    
636        if (identity >= m_nodes.size())
637        {
638          if (!nextNode())
639            identity = DTM.NULL;
640        }
641    
642        return identity;
643      }
644    
645      /**
646       * Get the handle from a Node.
647       * <p>%OPT% This will be pretty slow.</p>
648       *
649       * <p>%OPT% An XPath-like search (walk up DOM to root, tracking path;
650       * walk down DTM reconstructing path) might be considerably faster
651       * on later nodes in large documents. That might also imply improving
652       * this call to handle nodes which would be in this DTM but
653       * have not yet been built, which might or might not be a Good Thing.</p>
654       * 
655       * %REVIEW% This relies on being able to test node-identity via
656       * object-identity. DTM2DOM proxying is a great example of a case where
657       * that doesn't work. DOM Level 3 will provide the isSameNode() method
658       * to fix that, but until then this is going to be flaky.
659       *
660       * @param node A node, which may be null.
661       *
662       * @return The node handle or <code>DTM.NULL</code>.
663       */
664      private int getHandleFromNode(Node node)
665      {
666        if (null != node)
667        {
668          int len = m_nodes.size();        
669          boolean isMore;
670          int i = 0;
671          do
672          {          
673            for (; i < len; i++)
674            {
675              if (m_nodes.elementAt(i) == node)
676                return makeNodeHandle(i);
677            }
678    
679            isMore = nextNode();
680      
681            len = m_nodes.size();
682                
683          } 
684          while(isMore || i < len);
685        }
686        
687        return DTM.NULL;
688      }
689    
690      /** Get the handle from a Node. This is a more robust version of
691       * getHandleFromNode, intended to be usable by the public.
692       *
693       * <p>%OPT% This will be pretty slow.</p>
694       * 
695       * %REVIEW% This relies on being able to test node-identity via
696       * object-identity. DTM2DOM proxying is a great example of a case where
697       * that doesn't work. DOM Level 3 will provide the isSameNode() method
698       * to fix that, but until then this is going to be flaky.
699       *
700       * @param node A node, which may be null.
701       *
702       * @return The node handle or <code>DTM.NULL</code>.  */
703      public int getHandleOfNode(Node node)
704      {
705        if (null != node)
706        {
707          // Is Node actually within the same document? If not, don't search!
708          // This would be easier if m_root was always the Document node, but
709          // we decided to allow wrapping a DTM around a subtree.
710          if((m_root==node) ||
711             (m_root.getNodeType()==DOCUMENT_NODE &&
712              m_root==node.getOwnerDocument()) ||
713             (m_root.getNodeType()!=DOCUMENT_NODE &&
714              m_root.getOwnerDocument()==node.getOwnerDocument())
715             )
716            {
717              // If node _is_ in m_root's tree, find its handle
718              //
719              // %OPT% This check may be improved significantly when DOM
720              // Level 3 nodeKey and relative-order tests become
721              // available!
722              for(Node cursor=node;
723                  cursor!=null;
724                  cursor=
725                    (cursor.getNodeType()!=ATTRIBUTE_NODE)
726                    ? cursor.getParentNode()
727                    : ((org.w3c.dom.Attr)cursor).getOwnerElement())
728                {
729                  if(cursor==m_root)
730                    // We know this node; find its handle.
731                    return getHandleFromNode(node); 
732                } // for ancestors of node
733            } // if node and m_root in same Document
734        } // if node!=null
735    
736        return DTM.NULL;
737      }
738    
739      /**
740       * Retrieves an attribute node by by qualified name and namespace URI.
741       *
742       * @param nodeHandle int Handle of the node upon which to look up this attribute..
743       * @param namespaceURI The namespace URI of the attribute to
744       *   retrieve, or null.
745       * @param name The local name of the attribute to
746       *   retrieve.
747       * @return The attribute node handle with the specified name (
748       *   <code>nodeName</code>) or <code>DTM.NULL</code> if there is no such
749       *   attribute.
750       */
751      public int getAttributeNode(int nodeHandle, String namespaceURI,
752                                  String name)
753      {
754    
755        // %OPT% This is probably slower than it needs to be.
756        if (null == namespaceURI)
757          namespaceURI = "";
758    
759        int type = getNodeType(nodeHandle);
760    
761        if (DTM.ELEMENT_NODE == type)
762        {
763    
764          // Assume that attributes immediately follow the element.
765          int identity = makeNodeIdentity(nodeHandle);
766    
767          while (DTM.NULL != (identity = getNextNodeIdentity(identity)))
768          {
769            // Assume this can not be null.
770            type = _type(identity);
771    
772                                    // %REVIEW%
773                                    // Should namespace nodes be retrievable DOM-style as attrs?
774                                    // If not we need a separate function... which may be desirable
775                                    // architecturally, but which is ugly from a code point of view.
776                                    // (If we REALLY insist on it, this code should become a subroutine
777                                    // of both -- retrieve the node, then test if the type matches
778                                    // what you're looking for.)
779            if (type == DTM.ATTRIBUTE_NODE || type==DTM.NAMESPACE_NODE)
780            {
781              Node node = lookupNode(identity);
782              String nodeuri = node.getNamespaceURI();
783    
784              if (null == nodeuri)
785                nodeuri = "";
786    
787              String nodelocalname = node.getLocalName();
788    
789              if (nodeuri.equals(namespaceURI) && name.equals(nodelocalname))
790                return makeNodeHandle(identity);
791            }
792                                    
793            else // if (DTM.NAMESPACE_NODE != type)
794            {
795              break;
796            }
797          }
798        }
799    
800        return DTM.NULL;
801      }
802    
803      /**
804       * Get the string-value of a node as a String object
805       * (see http://www.w3.org/TR/xpath#data-model
806       * for the definition of a node's string-value).
807       *
808       * @param nodeHandle The node ID.
809       *
810       * @return A string object that represents the string-value of the given node.
811       */
812      public XMLString getStringValue(int nodeHandle)
813      {
814    
815        int type = getNodeType(nodeHandle);
816        Node node = getNode(nodeHandle);
817        // %TBD% If an element only has one text node, we should just use it 
818        // directly.
819        if(DTM.ELEMENT_NODE == type || DTM.DOCUMENT_NODE == type 
820        || DTM.DOCUMENT_FRAGMENT_NODE == type)
821        {
822          FastStringBuffer buf = StringBufferPool.get();
823          String s;
824      
825          try
826          {
827            getNodeData(node, buf);
828      
829            s = (buf.length() > 0) ? buf.toString() : "";
830          }
831          finally
832          {
833            StringBufferPool.free(buf);
834          }
835      
836          return m_xstrf.newstr( s );
837        }
838        else if(TEXT_NODE == type || CDATA_SECTION_NODE == type)
839        {
840          // If this is a DTM text node, it may be made of multiple DOM text
841          // nodes -- including navigating into Entity References. DOM2DTM
842          // records the first node in the sequence and requires that we
843          // pick up the others when we retrieve the DTM node's value.
844          //
845          // %REVIEW% DOM Level 3 is expected to add a "whole text"
846          // retrieval method which performs this function for us.
847          FastStringBuffer buf = StringBufferPool.get();
848          while(node!=null)
849          {
850            buf.append(node.getNodeValue());
851            node=logicalNextDOMTextNode(node);
852          }
853          String s=(buf.length() > 0) ? buf.toString() : "";
854          StringBufferPool.free(buf);
855          return m_xstrf.newstr( s );
856        }
857        else
858          return m_xstrf.newstr( node.getNodeValue() );
859      }
860      
861      /**
862       * Determine if the string-value of a node is whitespace
863       *
864       * @param nodeHandle The node Handle.
865       *
866       * @return Return true if the given node is whitespace.
867       */
868      public boolean isWhitespace(int nodeHandle)
869      {
870            int type = getNodeType(nodeHandle);
871        Node node = getNode(nodeHandle);
872            if(TEXT_NODE == type || CDATA_SECTION_NODE == type)
873        {
874          // If this is a DTM text node, it may be made of multiple DOM text
875          // nodes -- including navigating into Entity References. DOM2DTM
876          // records the first node in the sequence and requires that we
877          // pick up the others when we retrieve the DTM node's value.
878          //
879          // %REVIEW% DOM Level 3 is expected to add a "whole text"
880          // retrieval method which performs this function for us.
881          FastStringBuffer buf = StringBufferPool.get();
882          while(node!=null)
883          {
884            buf.append(node.getNodeValue());
885            node=logicalNextDOMTextNode(node);
886          }
887         boolean b = buf.isWhitespace(0, buf.length());
888          StringBufferPool.free(buf);
889         return b;
890        }
891        return false;
892      }
893      
894      /**
895       * Retrieve the text content of a DOM subtree, appending it into a
896       * user-supplied FastStringBuffer object. Note that attributes are
897       * not considered part of the content of an element.
898       * <p>
899       * There are open questions regarding whitespace stripping. 
900       * Currently we make no special effort in that regard, since the standard
901       * DOM doesn't yet provide DTD-based information to distinguish
902       * whitespace-in-element-context from genuine #PCDATA. Note that we
903       * should probably also consider xml:space if/when we address this.
904       * DOM Level 3 may solve the problem for us.
905       * <p>
906       * %REVIEW% Actually, since this method operates on the DOM side of the
907       * fence rather than the DTM side, it SHOULDN'T do
908       * any special handling. The DOM does what the DOM does; if you want
909       * DTM-level abstractions, use DTM-level methods.
910       *
911       * @param node Node whose subtree is to be walked, gathering the
912       * contents of all Text or CDATASection nodes.
913       * @param buf FastStringBuffer into which the contents of the text
914       * nodes are to be concatenated.
915       */
916      protected static void getNodeData(Node node, FastStringBuffer buf)
917      {
918    
919        switch (node.getNodeType())
920        {
921        case Node.DOCUMENT_FRAGMENT_NODE :
922        case Node.DOCUMENT_NODE :
923        case Node.ELEMENT_NODE :
924        {
925          for (Node child = node.getFirstChild(); null != child;
926                  child = child.getNextSibling())
927          {
928            getNodeData(child, buf);
929          }
930        }
931        break;
932        case Node.TEXT_NODE :
933        case Node.CDATA_SECTION_NODE :
934        case Node.ATTRIBUTE_NODE :  // Never a child but might be our starting node
935          buf.append(node.getNodeValue());
936          break;
937        case Node.PROCESSING_INSTRUCTION_NODE :
938          // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING);        
939          break;
940        default :
941          // ignore
942          break;
943        }
944      }
945    
946      /**
947       * Given a node handle, return its DOM-style node name. This will
948       * include names such as #text or #document.
949       *
950       * @param nodeHandle the id of the node.
951       * @return String Name of this node, which may be an empty string.
952       * %REVIEW% Document when empty string is possible...
953       * %REVIEW-COMMENT% It should never be empty, should it?
954       */
955      public String getNodeName(int nodeHandle)
956      {
957    
958        Node node = getNode(nodeHandle);
959    
960        // Assume non-null.
961        return node.getNodeName();
962      }
963    
964      /**
965       * Given a node handle, return the XPath node name.  This should be
966       * the name as described by the XPath data model, NOT the DOM-style
967       * name.
968       *
969       * @param nodeHandle the id of the node.
970       * @return String Name of this node, which may be an empty string.
971       */
972      public String getNodeNameX(int nodeHandle)
973      {
974    
975        String name;
976        short type = getNodeType(nodeHandle);
977    
978        switch (type)
979        {
980        case DTM.NAMESPACE_NODE :
981        {
982          Node node = getNode(nodeHandle);
983    
984          // assume not null.
985          name = node.getNodeName();
986          if(name.startsWith("xmlns:"))
987          {
988            name = QName.getLocalPart(name);
989          }
990          else if(name.equals("xmlns"))
991          {
992            name = "";
993          }
994        }
995        break;
996        case DTM.ATTRIBUTE_NODE :
997        case DTM.ELEMENT_NODE :
998        case DTM.ENTITY_REFERENCE_NODE :
999        case DTM.PROCESSING_INSTRUCTION_NODE :
1000        {
1001          Node node = getNode(nodeHandle);
1002    
1003          // assume not null.
1004          name = node.getNodeName();
1005        }
1006        break;
1007        default :
1008          name = "";
1009        }
1010    
1011        return name;
1012      }
1013    
1014      /**
1015       * Given a node handle, return its XPath-style localname.
1016       * (As defined in Namespaces, this is the portion of the name after any
1017       * colon character).
1018       *
1019       * @param nodeHandle the id of the node.
1020       * @return String Local name of this node.
1021       */
1022      public String getLocalName(int nodeHandle)
1023      {
1024        if(JJK_NEWCODE)
1025        {
1026          int id=makeNodeIdentity(nodeHandle);
1027          if(NULL==id) return null;
1028          Node newnode=(Node)m_nodes.elementAt(id);
1029          String newname=newnode.getLocalName();
1030          if (null == newname)
1031          {
1032            // XSLT treats PIs, and possibly other things, as having QNames.
1033            String qname = newnode.getNodeName();
1034            if('#'==qname.charAt(0))
1035            {
1036              //  Match old default for this function
1037              // This conversion may or may not be necessary
1038              newname="";
1039            }
1040            else
1041            {
1042              int index = qname.indexOf(':');
1043              newname = (index < 0) ? qname : qname.substring(index + 1);
1044            }
1045          }
1046          return newname;
1047        }
1048        else
1049        {
1050          String name;
1051          short type = getNodeType(nodeHandle);
1052          switch (type)
1053          {
1054          case DTM.ATTRIBUTE_NODE :
1055          case DTM.ELEMENT_NODE :
1056          case DTM.ENTITY_REFERENCE_NODE :
1057          case DTM.NAMESPACE_NODE :
1058          case DTM.PROCESSING_INSTRUCTION_NODE :
1059            {
1060              Node node = getNode(nodeHandle);
1061              
1062              // assume not null.
1063              name = node.getLocalName();
1064              
1065              if (null == name)
1066              {
1067                String qname = node.getNodeName();
1068                int index = qname.indexOf(':');
1069                
1070                name = (index < 0) ? qname : qname.substring(index + 1);
1071              }
1072            }
1073            break;
1074          default :
1075            name = "";
1076          }
1077          return name;
1078        }
1079      }
1080    
1081      /**
1082       * Given a namespace handle, return the prefix that the namespace decl is
1083       * mapping.
1084       * Given a node handle, return the prefix used to map to the namespace.
1085       *
1086       * <p> %REVIEW% Are you sure you want "" for no prefix?  </p>
1087       * <p> %REVIEW-COMMENT% I think so... not totally sure. -sb  </p>
1088       *
1089       * @param nodeHandle the id of the node.
1090       * @return String prefix of this node's name, or "" if no explicit
1091       * namespace prefix was given.
1092       */
1093      public String getPrefix(int nodeHandle)
1094      {
1095    
1096        String prefix;
1097        short type = getNodeType(nodeHandle);
1098    
1099        switch (type)
1100        {
1101        case DTM.NAMESPACE_NODE :
1102        {
1103          Node node = getNode(nodeHandle);
1104    
1105          // assume not null.
1106          String qname = node.getNodeName();
1107          int index = qname.indexOf(':');
1108    
1109          prefix = (index < 0) ? "" : qname.substring(index + 1);
1110        }
1111        break;
1112        case DTM.ATTRIBUTE_NODE :
1113        case DTM.ELEMENT_NODE :
1114        {
1115          Node node = getNode(nodeHandle);
1116    
1117          // assume not null.
1118          String qname = node.getNodeName();
1119          int index = qname.indexOf(':');
1120    
1121          prefix = (index < 0) ? "" : qname.substring(0, index);
1122        }
1123        break;
1124        default :
1125          prefix = "";
1126        }
1127    
1128        return prefix;
1129      }
1130    
1131      /**
1132       * Given a node handle, return its DOM-style namespace URI
1133       * (As defined in Namespaces, this is the declared URI which this node's
1134       * prefix -- or default in lieu thereof -- was mapped to.)
1135       *
1136       * <p>%REVIEW% Null or ""? -sb</p>
1137       *
1138       * @param nodeHandle the id of the node.
1139       * @return String URI value of this node's namespace, or null if no
1140       * namespace was resolved.
1141       */
1142      public String getNamespaceURI(int nodeHandle)
1143      {
1144        if(JJK_NEWCODE)
1145        {
1146          int id=makeNodeIdentity(nodeHandle);
1147          if(id==NULL) return null;
1148          Node node=(Node)m_nodes.elementAt(id);
1149          return node.getNamespaceURI();
1150        }
1151        else
1152        {
1153          String nsuri;
1154          short type = getNodeType(nodeHandle);
1155          
1156          switch (type)
1157          {
1158          case DTM.ATTRIBUTE_NODE :
1159          case DTM.ELEMENT_NODE :
1160          case DTM.ENTITY_REFERENCE_NODE :
1161          case DTM.NAMESPACE_NODE :
1162          case DTM.PROCESSING_INSTRUCTION_NODE :
1163            {
1164              Node node = getNode(nodeHandle);
1165              
1166              // assume not null.
1167              nsuri = node.getNamespaceURI();
1168              
1169              // %TBD% Handle DOM1?
1170            }
1171            break;
1172          default :
1173            nsuri = null;
1174          }
1175    
1176          return nsuri;
1177        }
1178        
1179      }
1180      
1181      /** Utility function: Given a DOM Text node, determine whether it is
1182       * logically followed by another Text or CDATASection node. This may
1183       * involve traversing into Entity References.
1184       * 
1185       * %REVIEW% DOM Level 3 is expected to add functionality which may 
1186       * allow us to retire this.
1187       */
1188      private Node logicalNextDOMTextNode(Node n)
1189      {
1190            Node p=n.getNextSibling();
1191            if(p==null)
1192            {
1193                    // Walk out of any EntityReferenceNodes that ended with text
1194                    for(n=n.getParentNode();
1195                            n!=null && ENTITY_REFERENCE_NODE == n.getNodeType();
1196                            n=n.getParentNode())
1197                    {
1198                            p=n.getNextSibling();
1199                            if(p!=null)
1200                                    break;
1201                    }
1202            }
1203            n=p;
1204            while(n!=null && ENTITY_REFERENCE_NODE == n.getNodeType())
1205            {
1206                    // Walk into any EntityReferenceNodes that start with text
1207                    if(n.hasChildNodes())
1208                            n=n.getFirstChild();
1209                    else
1210                            n=n.getNextSibling();
1211            }
1212            if(n!=null)
1213            {
1214                    // Found a logical next sibling. Is it text?
1215                    int ntype=n.getNodeType();
1216                    if(TEXT_NODE != ntype && CDATA_SECTION_NODE != ntype)
1217                            n=null;
1218            }
1219            return n;
1220      }
1221    
1222      /**
1223       * Given a node handle, return its node value. This is mostly
1224       * as defined by the DOM, but may ignore some conveniences.
1225       * <p>
1226       *
1227       * @param nodeHandle The node id.
1228       * @return String Value of this node, or null if not
1229       * meaningful for this node type.
1230       */
1231      public String getNodeValue(int nodeHandle)
1232      {
1233        // The _type(nodeHandle) call was taking the lion's share of our
1234        // time, and was wrong anyway since it wasn't coverting handle to
1235        // identity. Inlined it.
1236        int type = _exptype(makeNodeIdentity(nodeHandle));
1237        type=(NULL != type) ? getNodeType(nodeHandle) : NULL;
1238        
1239        if(TEXT_NODE!=type && CDATA_SECTION_NODE!=type)
1240          return getNode(nodeHandle).getNodeValue();
1241        
1242        // If this is a DTM text node, it may be made of multiple DOM text
1243        // nodes -- including navigating into Entity References. DOM2DTM
1244        // records the first node in the sequence and requires that we
1245        // pick up the others when we retrieve the DTM node's value.
1246        //
1247        // %REVIEW% DOM Level 3 is expected to add a "whole text"
1248        // retrieval method which performs this function for us.
1249        Node node = getNode(nodeHandle);
1250        Node n=logicalNextDOMTextNode(node);
1251        if(n==null)
1252          return node.getNodeValue();
1253        
1254        FastStringBuffer buf = StringBufferPool.get();
1255            buf.append(node.getNodeValue());
1256        while(n!=null)
1257        {
1258          buf.append(n.getNodeValue());
1259          n=logicalNextDOMTextNode(n);
1260        }
1261        String s = (buf.length() > 0) ? buf.toString() : "";
1262        StringBufferPool.free(buf);
1263        return s;
1264      }
1265    
1266      /**
1267       *   A document type declaration information item has the following properties:
1268       *
1269       *     1. [system identifier] The system identifier of the external subset, if
1270       *        it exists. Otherwise this property has no value.
1271       *
1272       * @return the system identifier String object, or null if there is none.
1273       */
1274      public String getDocumentTypeDeclarationSystemIdentifier()
1275      {
1276    
1277        Document doc;
1278    
1279        if (m_root.getNodeType() == Node.DOCUMENT_NODE)
1280          doc = (Document) m_root;
1281        else
1282          doc = m_root.getOwnerDocument();
1283    
1284        if (null != doc)
1285        {
1286          DocumentType dtd = doc.getDoctype();
1287    
1288          if (null != dtd)
1289          {
1290            return dtd.getSystemId();
1291          }
1292        }
1293    
1294        return null;
1295      }
1296    
1297      /**
1298       * Return the public identifier of the external subset,
1299       * normalized as described in 4.2.2 External Entities [XML]. If there is
1300       * no external subset or if it has no public identifier, this property
1301       * has no value.
1302       *
1303       * @return the public identifier String object, or null if there is none.
1304       */
1305      public String getDocumentTypeDeclarationPublicIdentifier()
1306      {
1307    
1308        Document doc;
1309    
1310        if (m_root.getNodeType() == Node.DOCUMENT_NODE)
1311          doc = (Document) m_root;
1312        else
1313          doc = m_root.getOwnerDocument();
1314    
1315        if (null != doc)
1316        {
1317          DocumentType dtd = doc.getDoctype();
1318    
1319          if (null != dtd)
1320          {
1321            return dtd.getPublicId();
1322          }
1323        }
1324    
1325        return null;
1326      }
1327    
1328      /**
1329       * Returns the <code>Element</code> whose <code>ID</code> is given by
1330       * <code>elementId</code>. If no such element exists, returns
1331       * <code>DTM.NULL</code>. Behavior is not defined if more than one element
1332       * has this <code>ID</code>. Attributes (including those
1333       * with the name "ID") are not of type ID unless so defined by DTD/Schema
1334       * information available to the DTM implementation.
1335       * Implementations that do not know whether attributes are of type ID or
1336       * not are expected to return <code>DTM.NULL</code>.
1337       *
1338       * <p>%REVIEW% Presumably IDs are still scoped to a single document,
1339       * and this operation searches only within a single document, right?
1340       * Wouldn't want collisions between DTMs in the same process.</p>
1341       *
1342       * @param elementId The unique <code>id</code> value for an element.
1343       * @return The handle of the matching element.
1344       */
1345      public int getElementById(String elementId)
1346      {
1347    
1348        Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE) 
1349            ? (Document) m_root : m_root.getOwnerDocument();
1350            
1351        if(null != doc)
1352        {
1353          Node elem = doc.getElementById(elementId);
1354          if(null != elem)
1355          {
1356            int elemHandle = getHandleFromNode(elem);
1357            
1358            if(DTM.NULL == elemHandle)
1359            {
1360              int identity = m_nodes.size()-1;
1361              while (DTM.NULL != (identity = getNextNodeIdentity(identity)))
1362              {
1363                Node node = getNode(identity);
1364                if(node == elem)
1365                {
1366                  elemHandle = getHandleFromNode(elem);
1367                  break;
1368                }
1369               }
1370            }
1371            
1372            return elemHandle;
1373          }
1374        
1375        }
1376        return DTM.NULL;
1377      }
1378    
1379      /**
1380       * The getUnparsedEntityURI function returns the URI of the unparsed
1381       * entity with the specified name in the same document as the context
1382       * node (see [3.3 Unparsed Entities]). It returns the empty string if
1383       * there is no such entity.
1384       * <p>
1385       * XML processors may choose to use the System Identifier (if one
1386       * is provided) to resolve the entity, rather than the URI in the
1387       * Public Identifier. The details are dependent on the processor, and
1388       * we would have to support some form of plug-in resolver to handle
1389       * this properly. Currently, we simply return the System Identifier if
1390       * present, and hope that it a usable URI or that our caller can
1391       * map it to one.
1392       * TODO: Resolve Public Identifiers... or consider changing function name.
1393       * <p>
1394       * If we find a relative URI
1395       * reference, XML expects it to be resolved in terms of the base URI
1396       * of the document. The DOM doesn't do that for us, and it isn't
1397       * entirely clear whether that should be done here; currently that's
1398       * pushed up to a higher level of our application. (Note that DOM Level
1399       * 1 didn't store the document's base URI.)
1400       * TODO: Consider resolving Relative URIs.
1401       * <p>
1402       * (The DOM's statement that "An XML processor may choose to
1403       * completely expand entities before the structure model is passed
1404       * to the DOM" refers only to parsed entities, not unparsed, and hence
1405       * doesn't affect this function.)
1406       *
1407       * @param name A string containing the Entity Name of the unparsed
1408       * entity.
1409       *
1410       * @return String containing the URI of the Unparsed Entity, or an
1411       * empty string if no such entity exists.
1412       */
1413      public String getUnparsedEntityURI(String name)
1414      {
1415    
1416        String url = "";
1417        Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE) 
1418            ? (Document) m_root : m_root.getOwnerDocument();
1419    
1420        if (null != doc)
1421        {
1422          DocumentType doctype = doc.getDoctype();
1423      
1424          if (null != doctype)
1425          {
1426            NamedNodeMap entities = doctype.getEntities();
1427            if(null == entities)
1428              return url;
1429            Entity entity = (Entity) entities.getNamedItem(name);
1430            if(null == entity)
1431              return url;
1432            
1433            String notationName = entity.getNotationName();
1434      
1435            if (null != notationName)  // then it's unparsed
1436            {
1437              // The draft says: "The XSLT processor may use the public 
1438              // identifier to generate a URI for the entity instead of the URI 
1439              // specified in the system identifier. If the XSLT processor does 
1440              // not use the public identifier to generate the URI, it must use 
1441              // the system identifier; if the system identifier is a relative 
1442              // URI, it must be resolved into an absolute URI using the URI of 
1443              // the resource containing the entity declaration as the base 
1444              // URI [RFC2396]."
1445              // So I'm falling a bit short here.
1446              url = entity.getSystemId();
1447      
1448              if (null == url)
1449              {
1450                url = entity.getPublicId();
1451              }
1452              else
1453              {
1454                // This should be resolved to an absolute URL, but that's hard 
1455                // to do from here.
1456              }        
1457            }
1458          }
1459        }
1460    
1461        return url;
1462      }
1463    
1464      /**
1465       *     5. [specified] A flag indicating whether this attribute was actually
1466       *        specified in the start-tag of its element, or was defaulted from the
1467       *        DTD.
1468       *
1469       * @param attributeHandle the attribute handle
1470       * @return <code>true</code> if the attribute was specified;
1471       *         <code>false</code> if it was defaulted.
1472       */
1473      public boolean isAttributeSpecified(int attributeHandle)
1474      {
1475        int type = getNodeType(attributeHandle);
1476    
1477        if (DTM.ATTRIBUTE_NODE == type)
1478        {
1479          Attr attr = (Attr)getNode(attributeHandle);
1480          return attr.getSpecified();
1481        }
1482        return false;
1483      }
1484    
1485      /** Bind an IncrementalSAXSource to this DTM. NOT RELEVANT for DOM2DTM, since
1486       * we're wrapped around an existing DOM.
1487       *
1488       * @param source The IncrementalSAXSource that we want to recieve events from
1489       * on demand.
1490       */
1491      public void setIncrementalSAXSource(IncrementalSAXSource source)
1492      {
1493      }
1494      
1495      /** getContentHandler returns "our SAX builder" -- the thing that
1496       * someone else should send SAX events to in order to extend this
1497       * DTM model.
1498       *
1499       * @return null if this model doesn't respond to SAX events,
1500       * "this" if the DTM object has a built-in SAX ContentHandler,
1501       * the IncrmentalSAXSource if we're bound to one and should receive
1502       * the SAX stream via it for incremental build purposes...
1503       * */
1504      public org.xml.sax.ContentHandler getContentHandler()
1505      {
1506          return null;
1507      }
1508      
1509      /**
1510       * Return this DTM's lexical handler.
1511       *
1512       * %REVIEW% Should this return null if constrution already done/begun?
1513       *
1514       * @return null if this model doesn't respond to lexical SAX events,
1515       * "this" if the DTM object has a built-in SAX ContentHandler,
1516       * the IncrementalSAXSource if we're bound to one and should receive
1517       * the SAX stream via it for incremental build purposes...
1518       */
1519      public org.xml.sax.ext.LexicalHandler getLexicalHandler()
1520      {
1521    
1522        return null;
1523      }
1524    
1525      
1526      /**
1527       * Return this DTM's EntityResolver.
1528       *
1529       * @return null if this model doesn't respond to SAX entity ref events.
1530       */
1531      public org.xml.sax.EntityResolver getEntityResolver()
1532      {
1533    
1534        return null;
1535      }
1536      
1537      /**
1538       * Return this DTM's DTDHandler.
1539       *
1540       * @return null if this model doesn't respond to SAX dtd events.
1541       */
1542      public org.xml.sax.DTDHandler getDTDHandler()
1543      {
1544    
1545        return null;
1546      }
1547    
1548      /**
1549       * Return this DTM's ErrorHandler.
1550       *
1551       * @return null if this model doesn't respond to SAX error events.
1552       */
1553      public org.xml.sax.ErrorHandler getErrorHandler()
1554      {
1555    
1556        return null;
1557      }
1558      
1559      /**
1560       * Return this DTM's DeclHandler.
1561       *
1562       * @return null if this model doesn't respond to SAX Decl events.
1563       */
1564      public org.xml.sax.ext.DeclHandler getDeclHandler()
1565      {
1566    
1567        return null;
1568      }  
1569    
1570      /** @return true iff we're building this model incrementally (eg
1571       * we're partnered with a IncrementalSAXSource) and thus require that the
1572       * transformation and the parse run simultaneously. Guidance to the
1573       * DTMManager.
1574       * */
1575      public boolean needsTwoThreads()
1576      {
1577        return false;
1578      }
1579    
1580      // ========== Direct SAX Dispatch, for optimization purposes ========
1581      
1582      /**
1583       * Returns whether the specified <var>ch</var> conforms to the XML 1.0 definition
1584       * of whitespace.  Refer to <A href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S">
1585       * the definition of <CODE>S</CODE></A> for details.
1586       * @param   ch      Character to check as XML whitespace.
1587       * @return          =true if <var>ch</var> is XML whitespace; otherwise =false.
1588       */
1589      private static boolean isSpace(char ch)
1590      {
1591        return XMLCharacterRecognizer.isWhiteSpace(ch);  // Take the easy way out for now.
1592      }
1593    
1594      /**
1595       * Directly call the
1596       * characters method on the passed ContentHandler for the
1597       * string-value of the given node (see http://www.w3.org/TR/xpath#data-model
1598       * for the definition of a node's string-value). Multiple calls to the
1599       * ContentHandler's characters methods may well occur for a single call to
1600       * this method.
1601       *
1602       * @param nodeHandle The node ID.
1603       * @param ch A non-null reference to a ContentHandler.
1604       *
1605       * @throws org.xml.sax.SAXException
1606       */
1607      public void dispatchCharactersEvents(
1608              int nodeHandle, org.xml.sax.ContentHandler ch, 
1609              boolean normalize)
1610                throws org.xml.sax.SAXException
1611      {
1612        if(normalize)
1613        {
1614          XMLString str = getStringValue(nodeHandle);
1615          str = str.fixWhiteSpace(true, true, false);
1616          str.dispatchCharactersEvents(ch);
1617        }
1618        else
1619        {
1620          int type = getNodeType(nodeHandle);
1621          Node node = getNode(nodeHandle);
1622          dispatchNodeData(node, ch, 0);
1623              // Text coalition -- a DTM text node may represent multiple
1624              // DOM nodes.
1625              if(TEXT_NODE == type || CDATA_SECTION_NODE == type)
1626              {
1627                      while( null != (node=logicalNextDOMTextNode(node)) )
1628                      {
1629                          dispatchNodeData(node, ch, 0);
1630                      }
1631              }
1632        }
1633      }
1634      
1635      /**
1636       * Retrieve the text content of a DOM subtree, appending it into a
1637       * user-supplied FastStringBuffer object. Note that attributes are
1638       * not considered part of the content of an element.
1639       * <p>
1640       * There are open questions regarding whitespace stripping. 
1641       * Currently we make no special effort in that regard, since the standard
1642       * DOM doesn't yet provide DTD-based information to distinguish
1643       * whitespace-in-element-context from genuine #PCDATA. Note that we
1644       * should probably also consider xml:space if/when we address this.
1645       * DOM Level 3 may solve the problem for us.
1646       * <p>
1647       * %REVIEW% Note that as a DOM-level operation, it can be argued that this
1648       * routine _shouldn't_ perform any processing beyond what the DOM already
1649       * does, and that whitespace stripping and so on belong at the DTM level.
1650       * If you want a stripped DOM view, wrap DTM2DOM around DOM2DTM.
1651       *
1652       * @param node Node whose subtree is to be walked, gathering the
1653       * contents of all Text or CDATASection nodes.
1654       */
1655      protected static void dispatchNodeData(Node node, 
1656                                             org.xml.sax.ContentHandler ch, 
1657                                             int depth)
1658                throws org.xml.sax.SAXException
1659      {
1660    
1661        switch (node.getNodeType())
1662        {
1663        case Node.DOCUMENT_FRAGMENT_NODE :
1664        case Node.DOCUMENT_NODE :
1665        case Node.ELEMENT_NODE :
1666        {
1667          for (Node child = node.getFirstChild(); null != child;
1668                  child = child.getNextSibling())
1669          {
1670            dispatchNodeData(child, ch, depth+1);
1671          }
1672        }
1673        break;
1674        case Node.PROCESSING_INSTRUCTION_NODE : // %REVIEW%
1675        case Node.COMMENT_NODE :
1676          if(0 != depth)
1677            break;
1678            // NOTE: Because this operation works in the DOM space, it does _not_ attempt
1679            // to perform Text Coalition. That should only be done in DTM space. 
1680        case Node.TEXT_NODE :
1681        case Node.CDATA_SECTION_NODE :
1682        case Node.ATTRIBUTE_NODE :
1683          String str = node.getNodeValue();
1684          if(ch instanceof CharacterNodeHandler)
1685          {
1686            ((CharacterNodeHandler)ch).characters(node);
1687          }
1688          else
1689          {
1690            ch.characters(str.toCharArray(), 0, str.length());
1691          }
1692          break;
1693    //    /* case Node.PROCESSING_INSTRUCTION_NODE :
1694    //      // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING);        
1695    //      break; */
1696        default :
1697          // ignore
1698          break;
1699        }
1700      }
1701      
1702      TreeWalker m_walker = new TreeWalker(null);
1703      
1704      /**
1705       * Directly create SAX parser events from a subtree.
1706       *
1707       * @param nodeHandle The node ID.
1708       * @param ch A non-null reference to a ContentHandler.
1709       *
1710       * @throws org.xml.sax.SAXException
1711       */
1712      public void dispatchToEvents(int nodeHandle, org.xml.sax.ContentHandler ch)
1713              throws org.xml.sax.SAXException
1714      {
1715        TreeWalker treeWalker = m_walker;
1716        ContentHandler prevCH = treeWalker.getContentHandler();
1717        
1718        if(null != prevCH)
1719        {
1720          treeWalker = new TreeWalker(null);
1721        }
1722        treeWalker.setContentHandler(ch);
1723        
1724        try
1725        {
1726          Node node = getNode(nodeHandle);
1727          treeWalker.traverseFragment(node);
1728        }
1729        finally
1730        {
1731          treeWalker.setContentHandler(null);
1732        }
1733      }
1734      
1735      public interface CharacterNodeHandler
1736      {
1737        public void characters(Node node)
1738                throws org.xml.sax.SAXException;
1739      }
1740    
1741      /**
1742       * For the moment all the run time properties are ignored by this
1743       * class.
1744       *
1745       * @param property a <code>String</code> value
1746       * @param value an <code>Object</code> value
1747       */
1748      public void setProperty(String property, Object value)
1749      {
1750      }
1751      
1752      /**
1753       * No source information is available for DOM2DTM, so return
1754       * <code>null</code> here.
1755       *
1756       * @param node an <code>int</code> value
1757       * @return null
1758       */
1759      public SourceLocator getSourceLocatorFor(int node)
1760      {
1761        return null;
1762      }
1763    
1764    }
1765    
1766