001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the "License");
007 * you may not use this file except in compliance with the License.
008 * You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 /*
019 * $Id: DOM2DTM.java 478671 2006-11-23 21:00:31Z minchau $
020 */
021 package org.apache.xml.dtm.ref.dom2dtm;
022
023 import java.util.Vector;
024
025 import javax.xml.transform.SourceLocator;
026 import javax.xml.transform.dom.DOMSource;
027
028 import org.apache.xml.dtm.DTM;
029 import org.apache.xml.dtm.DTMManager;
030 import org.apache.xml.dtm.DTMWSFilter;
031 import org.apache.xml.dtm.ref.DTMDefaultBaseIterators;
032 import org.apache.xml.dtm.ref.DTMManagerDefault;
033 import org.apache.xml.dtm.ref.ExpandedNameTable;
034 import org.apache.xml.dtm.ref.IncrementalSAXSource;
035 import org.apache.xml.res.XMLErrorResources;
036 import org.apache.xml.res.XMLMessages;
037 import org.apache.xml.utils.FastStringBuffer;
038 import org.apache.xml.utils.QName;
039 import org.apache.xml.utils.StringBufferPool;
040 import org.apache.xml.utils.TreeWalker;
041 import org.apache.xml.utils.XMLCharacterRecognizer;
042 import org.apache.xml.utils.XMLString;
043 import org.apache.xml.utils.XMLStringFactory;
044 import org.w3c.dom.Attr;
045 import org.w3c.dom.Document;
046 import org.w3c.dom.DocumentType;
047 import org.w3c.dom.Element;
048 import org.w3c.dom.Entity;
049 import org.w3c.dom.NamedNodeMap;
050 import org.w3c.dom.Node;
051 import org.xml.sax.ContentHandler;
052
053 /** The <code>DOM2DTM</code> class serves up a DOM's contents via the
054 * DTM API.
055 *
056 * Note that it doesn't necessarily represent a full Document
057 * tree. You can wrap a DOM2DTM around a specific node and its subtree
058 * and the right things should happen. (I don't _think_ we currently
059 * support DocumentFrgment nodes as roots, though that might be worth
060 * considering.)
061 *
062 * Note too that we do not currently attempt to track document
063 * mutation. If you alter the DOM after wrapping DOM2DTM around it,
064 * all bets are off.
065 * */
066 public class DOM2DTM extends DTMDefaultBaseIterators
067 {
068 static final boolean JJK_DEBUG=false;
069 static final boolean JJK_NEWCODE=true;
070
071 /** Manefest constant
072 */
073 static final String NAMESPACE_DECL_NS="http://www.w3.org/XML/1998/namespace";
074
075 /** The current position in the DOM tree. Last node examined for
076 * possible copying to DTM. */
077 transient private Node m_pos;
078 /** The current position in the DTM tree. Who children get appended to. */
079 private int m_last_parent=0;
080 /** The current position in the DTM tree. Who children reference as their
081 * previous sib. */
082 private int m_last_kid=NULL;
083
084 /** The top of the subtree.
085 * %REVIEW%: 'may not be the same as m_context if "//foo" pattern.'
086 * */
087 transient private Node m_root;
088
089 /** True iff the first element has been processed. This is used to control
090 synthesis of the implied xml: namespace declaration node. */
091 boolean m_processedFirstElement=false;
092
093 /** true if ALL the nodes in the m_root subtree have been processed;
094 * false if our incremental build has not yet finished scanning the
095 * DOM tree. */
096 transient private boolean m_nodesAreProcessed;
097
098 /** The node objects. The instance part of the handle indexes
099 * directly into this vector. Each DTM node may actually be
100 * composed of several DOM nodes (for example, if logically-adjacent
101 * Text/CDATASection nodes in the DOM have been coalesced into a
102 * single DTM Text node); this table points only to the first in
103 * that sequence. */
104 protected Vector m_nodes = new Vector();
105
106 /**
107 * Construct a DOM2DTM object from a DOM node.
108 *
109 * @param mgr The DTMManager who owns this DTM.
110 * @param domSource the DOM source that this DTM will wrap.
111 * @param dtmIdentity The DTM identity ID for this DTM.
112 * @param whiteSpaceFilter The white space filter for this DTM, which may
113 * be null.
114 * @param xstringfactory XMLString factory for creating character content.
115 * @param doIndexing true if the caller considers it worth it to use
116 * indexing schemes.
117 */
118 public DOM2DTM(DTMManager mgr, DOMSource domSource,
119 int dtmIdentity, DTMWSFilter whiteSpaceFilter,
120 XMLStringFactory xstringfactory,
121 boolean doIndexing)
122 {
123 super(mgr, domSource, dtmIdentity, whiteSpaceFilter,
124 xstringfactory, doIndexing);
125
126 // Initialize DOM navigation
127 m_pos=m_root = domSource.getNode();
128 // Initialize DTM navigation
129 m_last_parent=m_last_kid=NULL;
130 m_last_kid=addNode(m_root, m_last_parent,m_last_kid, NULL);
131
132 // Apparently the domSource root may not actually be the
133 // Document node. If it's an Element node, we need to immediately
134 // add its attributes. Adapted from nextNode().
135 // %REVIEW% Move this logic into addNode and recurse? Cleaner!
136 //
137 // (If it's an EntityReference node, we're probably in
138 // seriously bad trouble. For now
139 // I'm just hoping nobody is ever quite that foolish... %REVIEW%)
140 //
141 // %ISSUE% What about inherited namespaces in this case?
142 // Do we need to special-case initialize them into the DTM model?
143 if(ELEMENT_NODE == m_root.getNodeType())
144 {
145 NamedNodeMap attrs=m_root.getAttributes();
146 int attrsize=(attrs==null) ? 0 : attrs.getLength();
147 if(attrsize>0)
148 {
149 int attrIndex=NULL; // start with no previous sib
150 for(int i=0;i<attrsize;++i)
151 {
152 // No need to force nodetype in this case;
153 // addNode() will take care of switching it from
154 // Attr to Namespace if necessary.
155 attrIndex=addNode(attrs.item(i),0,attrIndex,NULL);
156 m_firstch.setElementAt(DTM.NULL,attrIndex);
157 }
158 // Terminate list of attrs, and make sure they aren't
159 // considered children of the element
160 m_nextsib.setElementAt(DTM.NULL,attrIndex);
161
162 // IMPORTANT: This does NOT change m_last_parent or m_last_kid!
163 } // if attrs exist
164 } //if(ELEMENT_NODE)
165
166 // Initialize DTM-completed status
167 m_nodesAreProcessed = false;
168 }
169
170 /**
171 * Construct the node map from the node.
172 *
173 * @param node The node that is to be added to the DTM.
174 * @param parentIndex The current parent index.
175 * @param previousSibling The previous sibling index.
176 * @param forceNodeType If not DTM.NULL, overrides the DOM node type.
177 * Used to force nodes to Text rather than CDATASection when their
178 * coalesced value includes ordinary Text nodes (current DTM behavior).
179 *
180 * @return The index identity of the node that was added.
181 */
182 protected int addNode(Node node, int parentIndex,
183 int previousSibling, int forceNodeType)
184 {
185 int nodeIndex = m_nodes.size();
186
187 // Have we overflowed a DTM Identity's addressing range?
188 if(m_dtmIdent.size() == (nodeIndex>>>DTMManager.IDENT_DTM_NODE_BITS))
189 {
190 try
191 {
192 if(m_mgr==null)
193 throw new ClassCastException();
194
195 // Handle as Extended Addressing
196 DTMManagerDefault mgrD=(DTMManagerDefault)m_mgr;
197 int id=mgrD.getFirstFreeDTMID();
198 mgrD.addDTM(this,id,nodeIndex);
199 m_dtmIdent.addElement(id<<DTMManager.IDENT_DTM_NODE_BITS);
200 }
201 catch(ClassCastException e)
202 {
203 // %REVIEW% Wrong error message, but I've been told we're trying
204 // not to add messages right not for I18N reasons.
205 // %REVIEW% Should this be a Fatal Error?
206 error(XMLMessages.createXMLMessage(XMLErrorResources.ER_NO_DTMIDS_AVAIL, null));//"No more DTM IDs are available";
207 }
208 }
209
210 m_size++;
211 // ensureSize(nodeIndex);
212
213 int type;
214 if(NULL==forceNodeType)
215 type = node.getNodeType();
216 else
217 type=forceNodeType;
218
219 // %REVIEW% The Namespace Spec currently says that Namespaces are
220 // processed in a non-namespace-aware manner, by matching the
221 // QName, even though there is in fact a namespace assigned to
222 // these nodes in the DOM. If and when that changes, we will have
223 // to consider whether we check the namespace-for-namespaces
224 // rather than the node name.
225 //
226 // %TBD% Note that the DOM does not necessarily explicitly declare
227 // all the namespaces it uses. DOM Level 3 will introduce a
228 // namespace-normalization operation which reconciles that, and we
229 // can request that users invoke it or otherwise ensure that the
230 // tree is namespace-well-formed before passing the DOM to Xalan.
231 // But if they don't, what should we do about it? We probably
232 // don't want to alter the source DOM (and may not be able to do
233 // so if it's read-only). The best available answer might be to
234 // synthesize additional DTM Namespace Nodes that don't correspond
235 // to DOM Attr Nodes.
236 if (Node.ATTRIBUTE_NODE == type)
237 {
238 String name = node.getNodeName();
239
240 if (name.startsWith("xmlns:") || name.equals("xmlns"))
241 {
242 type = DTM.NAMESPACE_NODE;
243 }
244 }
245
246 m_nodes.addElement(node);
247
248 m_firstch.setElementAt(NOTPROCESSED,nodeIndex);
249 m_nextsib.setElementAt(NOTPROCESSED,nodeIndex);
250 m_prevsib.setElementAt(previousSibling,nodeIndex);
251 m_parent.setElementAt(parentIndex,nodeIndex);
252
253 if(DTM.NULL != parentIndex &&
254 type != DTM.ATTRIBUTE_NODE &&
255 type != DTM.NAMESPACE_NODE)
256 {
257 // If the DTM parent had no children, this becomes its first child.
258 if(NOTPROCESSED == m_firstch.elementAt(parentIndex))
259 m_firstch.setElementAt(nodeIndex,parentIndex);
260 }
261
262 String nsURI = node.getNamespaceURI();
263
264 // Deal with the difference between Namespace spec and XSLT
265 // definitions of local name. (The former says PIs don't have
266 // localnames; the latter says they do.)
267 String localName = (type == Node.PROCESSING_INSTRUCTION_NODE) ?
268 node.getNodeName() :
269 node.getLocalName();
270
271 // Hack to make DOM1 sort of work...
272 if(((type == Node.ELEMENT_NODE) || (type == Node.ATTRIBUTE_NODE))
273 && null == localName)
274 localName = node.getNodeName(); // -sb
275
276 ExpandedNameTable exnt = m_expandedNameTable;
277
278 // %TBD% Nodes created with the old non-namespace-aware DOM
279 // calls createElement() and createAttribute() will never have a
280 // localname. That will cause their expandedNameID to be just the
281 // nodeType... which will keep them from being matched
282 // successfully by name. Since the DOM makes no promise that
283 // those will participate in namespace processing, this is
284 // officially accepted as Not Our Fault. But it might be nice to
285 // issue a diagnostic message!
286 if(node.getLocalName()==null &&
287 (type==Node.ELEMENT_NODE || type==Node.ATTRIBUTE_NODE))
288 {
289 // warning("DOM 'level 1' node "+node.getNodeName()+" won't be mapped properly in DOM2DTM.");
290 }
291
292 int expandedNameID = (null != localName)
293 ? exnt.getExpandedTypeID(nsURI, localName, type) :
294 exnt.getExpandedTypeID(type);
295
296 m_exptype.setElementAt(expandedNameID,nodeIndex);
297
298 indexNode(expandedNameID, nodeIndex);
299
300 if (DTM.NULL != previousSibling)
301 m_nextsib.setElementAt(nodeIndex,previousSibling);
302
303 // This should be done after m_exptype has been set, and probably should
304 // always be the last thing we do
305 if (type == DTM.NAMESPACE_NODE)
306 declareNamespaceInContext(parentIndex,nodeIndex);
307
308 return nodeIndex;
309 }
310
311 /**
312 * Get the number of nodes that have been added.
313 */
314 public int getNumberOfNodes()
315 {
316 return m_nodes.size();
317 }
318
319 /**
320 * This method iterates to the next node that will be added to the table.
321 * Each call to this method adds a new node to the table, unless the end
322 * is reached, in which case it returns null.
323 *
324 * @return The true if a next node is found or false if
325 * there are no more nodes.
326 */
327 protected boolean nextNode()
328 {
329 // Non-recursive one-fetch-at-a-time depth-first traversal with
330 // attribute/namespace nodes and white-space stripping.
331 // Navigating the DOM is simple, navigating the DTM is simple;
332 // keeping track of both at once is a trifle baroque but at least
333 // we've avoided most of the special cases.
334 if (m_nodesAreProcessed)
335 return false;
336
337 // %REVIEW% Is this local copy Really Useful from a performance
338 // point of view? Or is this a false microoptimization?
339 Node pos=m_pos;
340 Node next=null;
341 int nexttype=NULL;
342
343 // Navigate DOM tree
344 do
345 {
346 // Look down to first child.
347 if (pos.hasChildNodes())
348 {
349 next = pos.getFirstChild();
350
351 // %REVIEW% There's probably a more elegant way to skip
352 // the doctype. (Just let it go and Suppress it?
353 if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType())
354 next=next.getNextSibling();
355
356 // Push DTM context -- except for children of Entity References,
357 // which have no DTM equivalent and cause no DTM navigation.
358 if(ENTITY_REFERENCE_NODE!=pos.getNodeType())
359 {
360 m_last_parent=m_last_kid;
361 m_last_kid=NULL;
362 // Whitespace-handler context stacking
363 if(null != m_wsfilter)
364 {
365 short wsv =
366 m_wsfilter.getShouldStripSpace(makeNodeHandle(m_last_parent),this);
367 boolean shouldStrip = (DTMWSFilter.INHERIT == wsv)
368 ? getShouldStripWhitespace()
369 : (DTMWSFilter.STRIP == wsv);
370 pushShouldStripWhitespace(shouldStrip);
371 } // if(m_wsfilter)
372 }
373 }
374
375 // If that fails, look up and right (but not past root!)
376 else
377 {
378 if(m_last_kid!=NULL)
379 {
380 // Last node posted at this level had no more children
381 // If it has _no_ children, we need to record that.
382 if(m_firstch.elementAt(m_last_kid)==NOTPROCESSED)
383 m_firstch.setElementAt(NULL,m_last_kid);
384 }
385
386 while(m_last_parent != NULL)
387 {
388 // %REVIEW% There's probably a more elegant way to
389 // skip the doctype. (Just let it go and Suppress it?
390 next = pos.getNextSibling();
391 if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType())
392 next=next.getNextSibling();
393
394 if(next!=null)
395 break; // Found it!
396
397 // No next-sibling found. Pop the DOM.
398 pos=pos.getParentNode();
399 if(pos==null)
400 {
401 // %TBD% Should never arise, but I want to be sure of that...
402 if(JJK_DEBUG)
403 {
404 System.out.println("***** DOM2DTM Pop Control Flow problem");
405 for(;;); // Freeze right here!
406 }
407 }
408
409 // The only parents in the DTM are Elements. However,
410 // the DOM could contain EntityReferences. If we
411 // encounter one, pop it _without_ popping DTM.
412 if(pos!=null && ENTITY_REFERENCE_NODE == pos.getNodeType())
413 {
414 // Nothing needs doing
415 if(JJK_DEBUG)
416 System.out.println("***** DOM2DTM popping EntRef");
417 }
418 else
419 {
420 popShouldStripWhitespace();
421 // Fix and pop DTM
422 if(m_last_kid==NULL)
423 m_firstch.setElementAt(NULL,m_last_parent); // Popping from an element
424 else
425 m_nextsib.setElementAt(NULL,m_last_kid); // Popping from anything else
426 m_last_parent=m_parent.elementAt(m_last_kid=m_last_parent);
427 }
428 }
429 if(m_last_parent==NULL)
430 next=null;
431 }
432
433 if(next!=null)
434 nexttype=next.getNodeType();
435
436 // If it's an entity ref, advance past it.
437 //
438 // %REVIEW% Should we let this out the door and just suppress it?
439 // More work, but simpler code, more likely to be correct, and
440 // it doesn't happen very often. We'd get rid of the loop too.
441 if (ENTITY_REFERENCE_NODE == nexttype)
442 pos=next;
443 }
444 while (ENTITY_REFERENCE_NODE == nexttype);
445
446 // Did we run out of the tree?
447 if(next==null)
448 {
449 m_nextsib.setElementAt(NULL,0);
450 m_nodesAreProcessed = true;
451 m_pos=null;
452
453 if(JJK_DEBUG)
454 {
455 System.out.println("***** DOM2DTM Crosscheck:");
456 for(int i=0;i<m_nodes.size();++i)
457 System.out.println(i+":\t"+m_firstch.elementAt(i)+"\t"+m_nextsib.elementAt(i));
458 }
459
460 return false;
461 }
462
463 // Text needs some special handling:
464 //
465 // DTM may skip whitespace. This is handled by the suppressNode flag, which
466 // when true will keep the DTM node from being created.
467 //
468 // DTM only directly records the first DOM node of any logically-contiguous
469 // sequence. The lastTextNode value will be set to the last node in the
470 // contiguous sequence, and -- AFTER the DTM addNode -- can be used to
471 // advance next over this whole block. Should be simpler than special-casing
472 // the above loop for "Was the logically-preceeding sibling a text node".
473 //
474 // Finally, a DTM node should be considered a CDATASection only if all the
475 // contiguous text it covers is CDATASections. The first Text should
476 // force DTM to Text.
477
478 boolean suppressNode=false;
479 Node lastTextNode=null;
480
481 nexttype=next.getNodeType();
482
483 // nexttype=pos.getNodeType();
484 if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype)
485 {
486 // If filtering, initially assume we're going to suppress the node
487 suppressNode=((null != m_wsfilter) && getShouldStripWhitespace());
488
489 // Scan logically contiguous text (siblings, plus "flattening"
490 // of entity reference boundaries).
491 Node n=next;
492 while(n!=null)
493 {
494 lastTextNode=n;
495 // Any Text node means DTM considers it all Text
496 if(TEXT_NODE == n.getNodeType())
497 nexttype=TEXT_NODE;
498 // Any non-whitespace in this sequence blocks whitespace
499 // suppression
500 suppressNode &=
501 XMLCharacterRecognizer.isWhiteSpace(n.getNodeValue());
502
503 n=logicalNextDOMTextNode(n);
504 }
505 }
506
507 // Special handling for PIs: Some DOMs represent the XML
508 // Declaration as a PI. This is officially incorrect, per the DOM
509 // spec, but is considered a "wrong but tolerable" temporary
510 // workaround pending proper handling of these fields in DOM Level
511 // 3. We want to recognize and reject that case.
512 else if(PROCESSING_INSTRUCTION_NODE==nexttype)
513 {
514 suppressNode = (pos.getNodeName().toLowerCase().equals("xml"));
515 }
516
517
518 if(!suppressNode)
519 {
520 // Inserting next. NOTE that we force the node type; for
521 // coalesced Text, this records CDATASections adjacent to
522 // ordinary Text as Text.
523 int nextindex=addNode(next,m_last_parent,m_last_kid,
524 nexttype);
525
526 m_last_kid=nextindex;
527
528 if(ELEMENT_NODE == nexttype)
529 {
530 int attrIndex=NULL; // start with no previous sib
531 // Process attributes _now_, rather than waiting.
532 // Simpler control flow, makes NS cache available immediately.
533 NamedNodeMap attrs=next.getAttributes();
534 int attrsize=(attrs==null) ? 0 : attrs.getLength();
535 if(attrsize>0)
536 {
537 for(int i=0;i<attrsize;++i)
538 {
539 // No need to force nodetype in this case;
540 // addNode() will take care of switching it from
541 // Attr to Namespace if necessary.
542 attrIndex=addNode(attrs.item(i),
543 nextindex,attrIndex,NULL);
544 m_firstch.setElementAt(DTM.NULL,attrIndex);
545
546 // If the xml: prefix is explicitly declared
547 // we don't need to synthesize one.
548 //
549 // NOTE that XML Namespaces were not originally
550 // defined as being namespace-aware (grrr), and
551 // while the W3C is planning to fix this it's
552 // safer for now to test the QName and trust the
553 // parsers to prevent anyone from redefining the
554 // reserved xmlns: prefix
555 if(!m_processedFirstElement
556 && "xmlns:xml".equals(attrs.item(i).getNodeName()))
557 m_processedFirstElement=true;
558 }
559 // Terminate list of attrs, and make sure they aren't
560 // considered children of the element
561 } // if attrs exist
562 if(!m_processedFirstElement)
563 {
564 // The DOM might not have an explicit declaration for the
565 // implicit "xml:" prefix, but the XPath data model
566 // requires that this appear as a Namespace Node so we
567 // have to synthesize one. You can think of this as
568 // being a default attribute defined by the XML
569 // Namespaces spec rather than by the DTD.
570 attrIndex=addNode(new DOM2DTMdefaultNamespaceDeclarationNode(
571 (Element)next,"xml",NAMESPACE_DECL_NS,
572 makeNodeHandle(((attrIndex==NULL)?nextindex:attrIndex)+1)
573 ),
574 nextindex,attrIndex,NULL);
575 m_firstch.setElementAt(DTM.NULL,attrIndex);
576 m_processedFirstElement=true;
577 }
578 if(attrIndex!=NULL)
579 m_nextsib.setElementAt(DTM.NULL,attrIndex);
580 } //if(ELEMENT_NODE)
581 } // (if !suppressNode)
582
583 // Text postprocessing: Act on values stored above
584 if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype)
585 {
586 // %TBD% If nexttype was forced to TEXT, patch the DTM node
587
588 next=lastTextNode; // Advance the DOM cursor over contiguous text
589 }
590
591 // Remember where we left off.
592 m_pos=next;
593 return true;
594 }
595
596
597 /**
598 * Return an DOM node for the given node.
599 *
600 * @param nodeHandle The node ID.
601 *
602 * @return A node representation of the DTM node.
603 */
604 public Node getNode(int nodeHandle)
605 {
606
607 int identity = makeNodeIdentity(nodeHandle);
608
609 return (Node) m_nodes.elementAt(identity);
610 }
611
612 /**
613 * Get a Node from an identity index.
614 *
615 * NEEDSDOC @param nodeIdentity
616 *
617 * NEEDSDOC ($objectName$) @return
618 */
619 protected Node lookupNode(int nodeIdentity)
620 {
621 return (Node) m_nodes.elementAt(nodeIdentity);
622 }
623
624 /**
625 * Get the next node identity value in the list, and call the iterator
626 * if it hasn't been added yet.
627 *
628 * @param identity The node identity (index).
629 * @return identity+1, or DTM.NULL.
630 */
631 protected int getNextNodeIdentity(int identity)
632 {
633
634 identity += 1;
635
636 if (identity >= m_nodes.size())
637 {
638 if (!nextNode())
639 identity = DTM.NULL;
640 }
641
642 return identity;
643 }
644
645 /**
646 * Get the handle from a Node.
647 * <p>%OPT% This will be pretty slow.</p>
648 *
649 * <p>%OPT% An XPath-like search (walk up DOM to root, tracking path;
650 * walk down DTM reconstructing path) might be considerably faster
651 * on later nodes in large documents. That might also imply improving
652 * this call to handle nodes which would be in this DTM but
653 * have not yet been built, which might or might not be a Good Thing.</p>
654 *
655 * %REVIEW% This relies on being able to test node-identity via
656 * object-identity. DTM2DOM proxying is a great example of a case where
657 * that doesn't work. DOM Level 3 will provide the isSameNode() method
658 * to fix that, but until then this is going to be flaky.
659 *
660 * @param node A node, which may be null.
661 *
662 * @return The node handle or <code>DTM.NULL</code>.
663 */
664 private int getHandleFromNode(Node node)
665 {
666 if (null != node)
667 {
668 int len = m_nodes.size();
669 boolean isMore;
670 int i = 0;
671 do
672 {
673 for (; i < len; i++)
674 {
675 if (m_nodes.elementAt(i) == node)
676 return makeNodeHandle(i);
677 }
678
679 isMore = nextNode();
680
681 len = m_nodes.size();
682
683 }
684 while(isMore || i < len);
685 }
686
687 return DTM.NULL;
688 }
689
690 /** Get the handle from a Node. This is a more robust version of
691 * getHandleFromNode, intended to be usable by the public.
692 *
693 * <p>%OPT% This will be pretty slow.</p>
694 *
695 * %REVIEW% This relies on being able to test node-identity via
696 * object-identity. DTM2DOM proxying is a great example of a case where
697 * that doesn't work. DOM Level 3 will provide the isSameNode() method
698 * to fix that, but until then this is going to be flaky.
699 *
700 * @param node A node, which may be null.
701 *
702 * @return The node handle or <code>DTM.NULL</code>. */
703 public int getHandleOfNode(Node node)
704 {
705 if (null != node)
706 {
707 // Is Node actually within the same document? If not, don't search!
708 // This would be easier if m_root was always the Document node, but
709 // we decided to allow wrapping a DTM around a subtree.
710 if((m_root==node) ||
711 (m_root.getNodeType()==DOCUMENT_NODE &&
712 m_root==node.getOwnerDocument()) ||
713 (m_root.getNodeType()!=DOCUMENT_NODE &&
714 m_root.getOwnerDocument()==node.getOwnerDocument())
715 )
716 {
717 // If node _is_ in m_root's tree, find its handle
718 //
719 // %OPT% This check may be improved significantly when DOM
720 // Level 3 nodeKey and relative-order tests become
721 // available!
722 for(Node cursor=node;
723 cursor!=null;
724 cursor=
725 (cursor.getNodeType()!=ATTRIBUTE_NODE)
726 ? cursor.getParentNode()
727 : ((org.w3c.dom.Attr)cursor).getOwnerElement())
728 {
729 if(cursor==m_root)
730 // We know this node; find its handle.
731 return getHandleFromNode(node);
732 } // for ancestors of node
733 } // if node and m_root in same Document
734 } // if node!=null
735
736 return DTM.NULL;
737 }
738
739 /**
740 * Retrieves an attribute node by by qualified name and namespace URI.
741 *
742 * @param nodeHandle int Handle of the node upon which to look up this attribute..
743 * @param namespaceURI The namespace URI of the attribute to
744 * retrieve, or null.
745 * @param name The local name of the attribute to
746 * retrieve.
747 * @return The attribute node handle with the specified name (
748 * <code>nodeName</code>) or <code>DTM.NULL</code> if there is no such
749 * attribute.
750 */
751 public int getAttributeNode(int nodeHandle, String namespaceURI,
752 String name)
753 {
754
755 // %OPT% This is probably slower than it needs to be.
756 if (null == namespaceURI)
757 namespaceURI = "";
758
759 int type = getNodeType(nodeHandle);
760
761 if (DTM.ELEMENT_NODE == type)
762 {
763
764 // Assume that attributes immediately follow the element.
765 int identity = makeNodeIdentity(nodeHandle);
766
767 while (DTM.NULL != (identity = getNextNodeIdentity(identity)))
768 {
769 // Assume this can not be null.
770 type = _type(identity);
771
772 // %REVIEW%
773 // Should namespace nodes be retrievable DOM-style as attrs?
774 // If not we need a separate function... which may be desirable
775 // architecturally, but which is ugly from a code point of view.
776 // (If we REALLY insist on it, this code should become a subroutine
777 // of both -- retrieve the node, then test if the type matches
778 // what you're looking for.)
779 if (type == DTM.ATTRIBUTE_NODE || type==DTM.NAMESPACE_NODE)
780 {
781 Node node = lookupNode(identity);
782 String nodeuri = node.getNamespaceURI();
783
784 if (null == nodeuri)
785 nodeuri = "";
786
787 String nodelocalname = node.getLocalName();
788
789 if (nodeuri.equals(namespaceURI) && name.equals(nodelocalname))
790 return makeNodeHandle(identity);
791 }
792
793 else // if (DTM.NAMESPACE_NODE != type)
794 {
795 break;
796 }
797 }
798 }
799
800 return DTM.NULL;
801 }
802
803 /**
804 * Get the string-value of a node as a String object
805 * (see http://www.w3.org/TR/xpath#data-model
806 * for the definition of a node's string-value).
807 *
808 * @param nodeHandle The node ID.
809 *
810 * @return A string object that represents the string-value of the given node.
811 */
812 public XMLString getStringValue(int nodeHandle)
813 {
814
815 int type = getNodeType(nodeHandle);
816 Node node = getNode(nodeHandle);
817 // %TBD% If an element only has one text node, we should just use it
818 // directly.
819 if(DTM.ELEMENT_NODE == type || DTM.DOCUMENT_NODE == type
820 || DTM.DOCUMENT_FRAGMENT_NODE == type)
821 {
822 FastStringBuffer buf = StringBufferPool.get();
823 String s;
824
825 try
826 {
827 getNodeData(node, buf);
828
829 s = (buf.length() > 0) ? buf.toString() : "";
830 }
831 finally
832 {
833 StringBufferPool.free(buf);
834 }
835
836 return m_xstrf.newstr( s );
837 }
838 else if(TEXT_NODE == type || CDATA_SECTION_NODE == type)
839 {
840 // If this is a DTM text node, it may be made of multiple DOM text
841 // nodes -- including navigating into Entity References. DOM2DTM
842 // records the first node in the sequence and requires that we
843 // pick up the others when we retrieve the DTM node's value.
844 //
845 // %REVIEW% DOM Level 3 is expected to add a "whole text"
846 // retrieval method which performs this function for us.
847 FastStringBuffer buf = StringBufferPool.get();
848 while(node!=null)
849 {
850 buf.append(node.getNodeValue());
851 node=logicalNextDOMTextNode(node);
852 }
853 String s=(buf.length() > 0) ? buf.toString() : "";
854 StringBufferPool.free(buf);
855 return m_xstrf.newstr( s );
856 }
857 else
858 return m_xstrf.newstr( node.getNodeValue() );
859 }
860
861 /**
862 * Determine if the string-value of a node is whitespace
863 *
864 * @param nodeHandle The node Handle.
865 *
866 * @return Return true if the given node is whitespace.
867 */
868 public boolean isWhitespace(int nodeHandle)
869 {
870 int type = getNodeType(nodeHandle);
871 Node node = getNode(nodeHandle);
872 if(TEXT_NODE == type || CDATA_SECTION_NODE == type)
873 {
874 // If this is a DTM text node, it may be made of multiple DOM text
875 // nodes -- including navigating into Entity References. DOM2DTM
876 // records the first node in the sequence and requires that we
877 // pick up the others when we retrieve the DTM node's value.
878 //
879 // %REVIEW% DOM Level 3 is expected to add a "whole text"
880 // retrieval method which performs this function for us.
881 FastStringBuffer buf = StringBufferPool.get();
882 while(node!=null)
883 {
884 buf.append(node.getNodeValue());
885 node=logicalNextDOMTextNode(node);
886 }
887 boolean b = buf.isWhitespace(0, buf.length());
888 StringBufferPool.free(buf);
889 return b;
890 }
891 return false;
892 }
893
894 /**
895 * Retrieve the text content of a DOM subtree, appending it into a
896 * user-supplied FastStringBuffer object. Note that attributes are
897 * not considered part of the content of an element.
898 * <p>
899 * There are open questions regarding whitespace stripping.
900 * Currently we make no special effort in that regard, since the standard
901 * DOM doesn't yet provide DTD-based information to distinguish
902 * whitespace-in-element-context from genuine #PCDATA. Note that we
903 * should probably also consider xml:space if/when we address this.
904 * DOM Level 3 may solve the problem for us.
905 * <p>
906 * %REVIEW% Actually, since this method operates on the DOM side of the
907 * fence rather than the DTM side, it SHOULDN'T do
908 * any special handling. The DOM does what the DOM does; if you want
909 * DTM-level abstractions, use DTM-level methods.
910 *
911 * @param node Node whose subtree is to be walked, gathering the
912 * contents of all Text or CDATASection nodes.
913 * @param buf FastStringBuffer into which the contents of the text
914 * nodes are to be concatenated.
915 */
916 protected static void getNodeData(Node node, FastStringBuffer buf)
917 {
918
919 switch (node.getNodeType())
920 {
921 case Node.DOCUMENT_FRAGMENT_NODE :
922 case Node.DOCUMENT_NODE :
923 case Node.ELEMENT_NODE :
924 {
925 for (Node child = node.getFirstChild(); null != child;
926 child = child.getNextSibling())
927 {
928 getNodeData(child, buf);
929 }
930 }
931 break;
932 case Node.TEXT_NODE :
933 case Node.CDATA_SECTION_NODE :
934 case Node.ATTRIBUTE_NODE : // Never a child but might be our starting node
935 buf.append(node.getNodeValue());
936 break;
937 case Node.PROCESSING_INSTRUCTION_NODE :
938 // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING);
939 break;
940 default :
941 // ignore
942 break;
943 }
944 }
945
946 /**
947 * Given a node handle, return its DOM-style node name. This will
948 * include names such as #text or #document.
949 *
950 * @param nodeHandle the id of the node.
951 * @return String Name of this node, which may be an empty string.
952 * %REVIEW% Document when empty string is possible...
953 * %REVIEW-COMMENT% It should never be empty, should it?
954 */
955 public String getNodeName(int nodeHandle)
956 {
957
958 Node node = getNode(nodeHandle);
959
960 // Assume non-null.
961 return node.getNodeName();
962 }
963
964 /**
965 * Given a node handle, return the XPath node name. This should be
966 * the name as described by the XPath data model, NOT the DOM-style
967 * name.
968 *
969 * @param nodeHandle the id of the node.
970 * @return String Name of this node, which may be an empty string.
971 */
972 public String getNodeNameX(int nodeHandle)
973 {
974
975 String name;
976 short type = getNodeType(nodeHandle);
977
978 switch (type)
979 {
980 case DTM.NAMESPACE_NODE :
981 {
982 Node node = getNode(nodeHandle);
983
984 // assume not null.
985 name = node.getNodeName();
986 if(name.startsWith("xmlns:"))
987 {
988 name = QName.getLocalPart(name);
989 }
990 else if(name.equals("xmlns"))
991 {
992 name = "";
993 }
994 }
995 break;
996 case DTM.ATTRIBUTE_NODE :
997 case DTM.ELEMENT_NODE :
998 case DTM.ENTITY_REFERENCE_NODE :
999 case DTM.PROCESSING_INSTRUCTION_NODE :
1000 {
1001 Node node = getNode(nodeHandle);
1002
1003 // assume not null.
1004 name = node.getNodeName();
1005 }
1006 break;
1007 default :
1008 name = "";
1009 }
1010
1011 return name;
1012 }
1013
1014 /**
1015 * Given a node handle, return its XPath-style localname.
1016 * (As defined in Namespaces, this is the portion of the name after any
1017 * colon character).
1018 *
1019 * @param nodeHandle the id of the node.
1020 * @return String Local name of this node.
1021 */
1022 public String getLocalName(int nodeHandle)
1023 {
1024 if(JJK_NEWCODE)
1025 {
1026 int id=makeNodeIdentity(nodeHandle);
1027 if(NULL==id) return null;
1028 Node newnode=(Node)m_nodes.elementAt(id);
1029 String newname=newnode.getLocalName();
1030 if (null == newname)
1031 {
1032 // XSLT treats PIs, and possibly other things, as having QNames.
1033 String qname = newnode.getNodeName();
1034 if('#'==qname.charAt(0))
1035 {
1036 // Match old default for this function
1037 // This conversion may or may not be necessary
1038 newname="";
1039 }
1040 else
1041 {
1042 int index = qname.indexOf(':');
1043 newname = (index < 0) ? qname : qname.substring(index + 1);
1044 }
1045 }
1046 return newname;
1047 }
1048 else
1049 {
1050 String name;
1051 short type = getNodeType(nodeHandle);
1052 switch (type)
1053 {
1054 case DTM.ATTRIBUTE_NODE :
1055 case DTM.ELEMENT_NODE :
1056 case DTM.ENTITY_REFERENCE_NODE :
1057 case DTM.NAMESPACE_NODE :
1058 case DTM.PROCESSING_INSTRUCTION_NODE :
1059 {
1060 Node node = getNode(nodeHandle);
1061
1062 // assume not null.
1063 name = node.getLocalName();
1064
1065 if (null == name)
1066 {
1067 String qname = node.getNodeName();
1068 int index = qname.indexOf(':');
1069
1070 name = (index < 0) ? qname : qname.substring(index + 1);
1071 }
1072 }
1073 break;
1074 default :
1075 name = "";
1076 }
1077 return name;
1078 }
1079 }
1080
1081 /**
1082 * Given a namespace handle, return the prefix that the namespace decl is
1083 * mapping.
1084 * Given a node handle, return the prefix used to map to the namespace.
1085 *
1086 * <p> %REVIEW% Are you sure you want "" for no prefix? </p>
1087 * <p> %REVIEW-COMMENT% I think so... not totally sure. -sb </p>
1088 *
1089 * @param nodeHandle the id of the node.
1090 * @return String prefix of this node's name, or "" if no explicit
1091 * namespace prefix was given.
1092 */
1093 public String getPrefix(int nodeHandle)
1094 {
1095
1096 String prefix;
1097 short type = getNodeType(nodeHandle);
1098
1099 switch (type)
1100 {
1101 case DTM.NAMESPACE_NODE :
1102 {
1103 Node node = getNode(nodeHandle);
1104
1105 // assume not null.
1106 String qname = node.getNodeName();
1107 int index = qname.indexOf(':');
1108
1109 prefix = (index < 0) ? "" : qname.substring(index + 1);
1110 }
1111 break;
1112 case DTM.ATTRIBUTE_NODE :
1113 case DTM.ELEMENT_NODE :
1114 {
1115 Node node = getNode(nodeHandle);
1116
1117 // assume not null.
1118 String qname = node.getNodeName();
1119 int index = qname.indexOf(':');
1120
1121 prefix = (index < 0) ? "" : qname.substring(0, index);
1122 }
1123 break;
1124 default :
1125 prefix = "";
1126 }
1127
1128 return prefix;
1129 }
1130
1131 /**
1132 * Given a node handle, return its DOM-style namespace URI
1133 * (As defined in Namespaces, this is the declared URI which this node's
1134 * prefix -- or default in lieu thereof -- was mapped to.)
1135 *
1136 * <p>%REVIEW% Null or ""? -sb</p>
1137 *
1138 * @param nodeHandle the id of the node.
1139 * @return String URI value of this node's namespace, or null if no
1140 * namespace was resolved.
1141 */
1142 public String getNamespaceURI(int nodeHandle)
1143 {
1144 if(JJK_NEWCODE)
1145 {
1146 int id=makeNodeIdentity(nodeHandle);
1147 if(id==NULL) return null;
1148 Node node=(Node)m_nodes.elementAt(id);
1149 return node.getNamespaceURI();
1150 }
1151 else
1152 {
1153 String nsuri;
1154 short type = getNodeType(nodeHandle);
1155
1156 switch (type)
1157 {
1158 case DTM.ATTRIBUTE_NODE :
1159 case DTM.ELEMENT_NODE :
1160 case DTM.ENTITY_REFERENCE_NODE :
1161 case DTM.NAMESPACE_NODE :
1162 case DTM.PROCESSING_INSTRUCTION_NODE :
1163 {
1164 Node node = getNode(nodeHandle);
1165
1166 // assume not null.
1167 nsuri = node.getNamespaceURI();
1168
1169 // %TBD% Handle DOM1?
1170 }
1171 break;
1172 default :
1173 nsuri = null;
1174 }
1175
1176 return nsuri;
1177 }
1178
1179 }
1180
1181 /** Utility function: Given a DOM Text node, determine whether it is
1182 * logically followed by another Text or CDATASection node. This may
1183 * involve traversing into Entity References.
1184 *
1185 * %REVIEW% DOM Level 3 is expected to add functionality which may
1186 * allow us to retire this.
1187 */
1188 private Node logicalNextDOMTextNode(Node n)
1189 {
1190 Node p=n.getNextSibling();
1191 if(p==null)
1192 {
1193 // Walk out of any EntityReferenceNodes that ended with text
1194 for(n=n.getParentNode();
1195 n!=null && ENTITY_REFERENCE_NODE == n.getNodeType();
1196 n=n.getParentNode())
1197 {
1198 p=n.getNextSibling();
1199 if(p!=null)
1200 break;
1201 }
1202 }
1203 n=p;
1204 while(n!=null && ENTITY_REFERENCE_NODE == n.getNodeType())
1205 {
1206 // Walk into any EntityReferenceNodes that start with text
1207 if(n.hasChildNodes())
1208 n=n.getFirstChild();
1209 else
1210 n=n.getNextSibling();
1211 }
1212 if(n!=null)
1213 {
1214 // Found a logical next sibling. Is it text?
1215 int ntype=n.getNodeType();
1216 if(TEXT_NODE != ntype && CDATA_SECTION_NODE != ntype)
1217 n=null;
1218 }
1219 return n;
1220 }
1221
1222 /**
1223 * Given a node handle, return its node value. This is mostly
1224 * as defined by the DOM, but may ignore some conveniences.
1225 * <p>
1226 *
1227 * @param nodeHandle The node id.
1228 * @return String Value of this node, or null if not
1229 * meaningful for this node type.
1230 */
1231 public String getNodeValue(int nodeHandle)
1232 {
1233 // The _type(nodeHandle) call was taking the lion's share of our
1234 // time, and was wrong anyway since it wasn't coverting handle to
1235 // identity. Inlined it.
1236 int type = _exptype(makeNodeIdentity(nodeHandle));
1237 type=(NULL != type) ? getNodeType(nodeHandle) : NULL;
1238
1239 if(TEXT_NODE!=type && CDATA_SECTION_NODE!=type)
1240 return getNode(nodeHandle).getNodeValue();
1241
1242 // If this is a DTM text node, it may be made of multiple DOM text
1243 // nodes -- including navigating into Entity References. DOM2DTM
1244 // records the first node in the sequence and requires that we
1245 // pick up the others when we retrieve the DTM node's value.
1246 //
1247 // %REVIEW% DOM Level 3 is expected to add a "whole text"
1248 // retrieval method which performs this function for us.
1249 Node node = getNode(nodeHandle);
1250 Node n=logicalNextDOMTextNode(node);
1251 if(n==null)
1252 return node.getNodeValue();
1253
1254 FastStringBuffer buf = StringBufferPool.get();
1255 buf.append(node.getNodeValue());
1256 while(n!=null)
1257 {
1258 buf.append(n.getNodeValue());
1259 n=logicalNextDOMTextNode(n);
1260 }
1261 String s = (buf.length() > 0) ? buf.toString() : "";
1262 StringBufferPool.free(buf);
1263 return s;
1264 }
1265
1266 /**
1267 * A document type declaration information item has the following properties:
1268 *
1269 * 1. [system identifier] The system identifier of the external subset, if
1270 * it exists. Otherwise this property has no value.
1271 *
1272 * @return the system identifier String object, or null if there is none.
1273 */
1274 public String getDocumentTypeDeclarationSystemIdentifier()
1275 {
1276
1277 Document doc;
1278
1279 if (m_root.getNodeType() == Node.DOCUMENT_NODE)
1280 doc = (Document) m_root;
1281 else
1282 doc = m_root.getOwnerDocument();
1283
1284 if (null != doc)
1285 {
1286 DocumentType dtd = doc.getDoctype();
1287
1288 if (null != dtd)
1289 {
1290 return dtd.getSystemId();
1291 }
1292 }
1293
1294 return null;
1295 }
1296
1297 /**
1298 * Return the public identifier of the external subset,
1299 * normalized as described in 4.2.2 External Entities [XML]. If there is
1300 * no external subset or if it has no public identifier, this property
1301 * has no value.
1302 *
1303 * @return the public identifier String object, or null if there is none.
1304 */
1305 public String getDocumentTypeDeclarationPublicIdentifier()
1306 {
1307
1308 Document doc;
1309
1310 if (m_root.getNodeType() == Node.DOCUMENT_NODE)
1311 doc = (Document) m_root;
1312 else
1313 doc = m_root.getOwnerDocument();
1314
1315 if (null != doc)
1316 {
1317 DocumentType dtd = doc.getDoctype();
1318
1319 if (null != dtd)
1320 {
1321 return dtd.getPublicId();
1322 }
1323 }
1324
1325 return null;
1326 }
1327
1328 /**
1329 * Returns the <code>Element</code> whose <code>ID</code> is given by
1330 * <code>elementId</code>. If no such element exists, returns
1331 * <code>DTM.NULL</code>. Behavior is not defined if more than one element
1332 * has this <code>ID</code>. Attributes (including those
1333 * with the name "ID") are not of type ID unless so defined by DTD/Schema
1334 * information available to the DTM implementation.
1335 * Implementations that do not know whether attributes are of type ID or
1336 * not are expected to return <code>DTM.NULL</code>.
1337 *
1338 * <p>%REVIEW% Presumably IDs are still scoped to a single document,
1339 * and this operation searches only within a single document, right?
1340 * Wouldn't want collisions between DTMs in the same process.</p>
1341 *
1342 * @param elementId The unique <code>id</code> value for an element.
1343 * @return The handle of the matching element.
1344 */
1345 public int getElementById(String elementId)
1346 {
1347
1348 Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE)
1349 ? (Document) m_root : m_root.getOwnerDocument();
1350
1351 if(null != doc)
1352 {
1353 Node elem = doc.getElementById(elementId);
1354 if(null != elem)
1355 {
1356 int elemHandle = getHandleFromNode(elem);
1357
1358 if(DTM.NULL == elemHandle)
1359 {
1360 int identity = m_nodes.size()-1;
1361 while (DTM.NULL != (identity = getNextNodeIdentity(identity)))
1362 {
1363 Node node = getNode(identity);
1364 if(node == elem)
1365 {
1366 elemHandle = getHandleFromNode(elem);
1367 break;
1368 }
1369 }
1370 }
1371
1372 return elemHandle;
1373 }
1374
1375 }
1376 return DTM.NULL;
1377 }
1378
1379 /**
1380 * The getUnparsedEntityURI function returns the URI of the unparsed
1381 * entity with the specified name in the same document as the context
1382 * node (see [3.3 Unparsed Entities]). It returns the empty string if
1383 * there is no such entity.
1384 * <p>
1385 * XML processors may choose to use the System Identifier (if one
1386 * is provided) to resolve the entity, rather than the URI in the
1387 * Public Identifier. The details are dependent on the processor, and
1388 * we would have to support some form of plug-in resolver to handle
1389 * this properly. Currently, we simply return the System Identifier if
1390 * present, and hope that it a usable URI or that our caller can
1391 * map it to one.
1392 * TODO: Resolve Public Identifiers... or consider changing function name.
1393 * <p>
1394 * If we find a relative URI
1395 * reference, XML expects it to be resolved in terms of the base URI
1396 * of the document. The DOM doesn't do that for us, and it isn't
1397 * entirely clear whether that should be done here; currently that's
1398 * pushed up to a higher level of our application. (Note that DOM Level
1399 * 1 didn't store the document's base URI.)
1400 * TODO: Consider resolving Relative URIs.
1401 * <p>
1402 * (The DOM's statement that "An XML processor may choose to
1403 * completely expand entities before the structure model is passed
1404 * to the DOM" refers only to parsed entities, not unparsed, and hence
1405 * doesn't affect this function.)
1406 *
1407 * @param name A string containing the Entity Name of the unparsed
1408 * entity.
1409 *
1410 * @return String containing the URI of the Unparsed Entity, or an
1411 * empty string if no such entity exists.
1412 */
1413 public String getUnparsedEntityURI(String name)
1414 {
1415
1416 String url = "";
1417 Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE)
1418 ? (Document) m_root : m_root.getOwnerDocument();
1419
1420 if (null != doc)
1421 {
1422 DocumentType doctype = doc.getDoctype();
1423
1424 if (null != doctype)
1425 {
1426 NamedNodeMap entities = doctype.getEntities();
1427 if(null == entities)
1428 return url;
1429 Entity entity = (Entity) entities.getNamedItem(name);
1430 if(null == entity)
1431 return url;
1432
1433 String notationName = entity.getNotationName();
1434
1435 if (null != notationName) // then it's unparsed
1436 {
1437 // The draft says: "The XSLT processor may use the public
1438 // identifier to generate a URI for the entity instead of the URI
1439 // specified in the system identifier. If the XSLT processor does
1440 // not use the public identifier to generate the URI, it must use
1441 // the system identifier; if the system identifier is a relative
1442 // URI, it must be resolved into an absolute URI using the URI of
1443 // the resource containing the entity declaration as the base
1444 // URI [RFC2396]."
1445 // So I'm falling a bit short here.
1446 url = entity.getSystemId();
1447
1448 if (null == url)
1449 {
1450 url = entity.getPublicId();
1451 }
1452 else
1453 {
1454 // This should be resolved to an absolute URL, but that's hard
1455 // to do from here.
1456 }
1457 }
1458 }
1459 }
1460
1461 return url;
1462 }
1463
1464 /**
1465 * 5. [specified] A flag indicating whether this attribute was actually
1466 * specified in the start-tag of its element, or was defaulted from the
1467 * DTD.
1468 *
1469 * @param attributeHandle the attribute handle
1470 * @return <code>true</code> if the attribute was specified;
1471 * <code>false</code> if it was defaulted.
1472 */
1473 public boolean isAttributeSpecified(int attributeHandle)
1474 {
1475 int type = getNodeType(attributeHandle);
1476
1477 if (DTM.ATTRIBUTE_NODE == type)
1478 {
1479 Attr attr = (Attr)getNode(attributeHandle);
1480 return attr.getSpecified();
1481 }
1482 return false;
1483 }
1484
1485 /** Bind an IncrementalSAXSource to this DTM. NOT RELEVANT for DOM2DTM, since
1486 * we're wrapped around an existing DOM.
1487 *
1488 * @param source The IncrementalSAXSource that we want to recieve events from
1489 * on demand.
1490 */
1491 public void setIncrementalSAXSource(IncrementalSAXSource source)
1492 {
1493 }
1494
1495 /** getContentHandler returns "our SAX builder" -- the thing that
1496 * someone else should send SAX events to in order to extend this
1497 * DTM model.
1498 *
1499 * @return null if this model doesn't respond to SAX events,
1500 * "this" if the DTM object has a built-in SAX ContentHandler,
1501 * the IncrmentalSAXSource if we're bound to one and should receive
1502 * the SAX stream via it for incremental build purposes...
1503 * */
1504 public org.xml.sax.ContentHandler getContentHandler()
1505 {
1506 return null;
1507 }
1508
1509 /**
1510 * Return this DTM's lexical handler.
1511 *
1512 * %REVIEW% Should this return null if constrution already done/begun?
1513 *
1514 * @return null if this model doesn't respond to lexical SAX events,
1515 * "this" if the DTM object has a built-in SAX ContentHandler,
1516 * the IncrementalSAXSource if we're bound to one and should receive
1517 * the SAX stream via it for incremental build purposes...
1518 */
1519 public org.xml.sax.ext.LexicalHandler getLexicalHandler()
1520 {
1521
1522 return null;
1523 }
1524
1525
1526 /**
1527 * Return this DTM's EntityResolver.
1528 *
1529 * @return null if this model doesn't respond to SAX entity ref events.
1530 */
1531 public org.xml.sax.EntityResolver getEntityResolver()
1532 {
1533
1534 return null;
1535 }
1536
1537 /**
1538 * Return this DTM's DTDHandler.
1539 *
1540 * @return null if this model doesn't respond to SAX dtd events.
1541 */
1542 public org.xml.sax.DTDHandler getDTDHandler()
1543 {
1544
1545 return null;
1546 }
1547
1548 /**
1549 * Return this DTM's ErrorHandler.
1550 *
1551 * @return null if this model doesn't respond to SAX error events.
1552 */
1553 public org.xml.sax.ErrorHandler getErrorHandler()
1554 {
1555
1556 return null;
1557 }
1558
1559 /**
1560 * Return this DTM's DeclHandler.
1561 *
1562 * @return null if this model doesn't respond to SAX Decl events.
1563 */
1564 public org.xml.sax.ext.DeclHandler getDeclHandler()
1565 {
1566
1567 return null;
1568 }
1569
1570 /** @return true iff we're building this model incrementally (eg
1571 * we're partnered with a IncrementalSAXSource) and thus require that the
1572 * transformation and the parse run simultaneously. Guidance to the
1573 * DTMManager.
1574 * */
1575 public boolean needsTwoThreads()
1576 {
1577 return false;
1578 }
1579
1580 // ========== Direct SAX Dispatch, for optimization purposes ========
1581
1582 /**
1583 * Returns whether the specified <var>ch</var> conforms to the XML 1.0 definition
1584 * of whitespace. Refer to <A href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S">
1585 * the definition of <CODE>S</CODE></A> for details.
1586 * @param ch Character to check as XML whitespace.
1587 * @return =true if <var>ch</var> is XML whitespace; otherwise =false.
1588 */
1589 private static boolean isSpace(char ch)
1590 {
1591 return XMLCharacterRecognizer.isWhiteSpace(ch); // Take the easy way out for now.
1592 }
1593
1594 /**
1595 * Directly call the
1596 * characters method on the passed ContentHandler for the
1597 * string-value of the given node (see http://www.w3.org/TR/xpath#data-model
1598 * for the definition of a node's string-value). Multiple calls to the
1599 * ContentHandler's characters methods may well occur for a single call to
1600 * this method.
1601 *
1602 * @param nodeHandle The node ID.
1603 * @param ch A non-null reference to a ContentHandler.
1604 *
1605 * @throws org.xml.sax.SAXException
1606 */
1607 public void dispatchCharactersEvents(
1608 int nodeHandle, org.xml.sax.ContentHandler ch,
1609 boolean normalize)
1610 throws org.xml.sax.SAXException
1611 {
1612 if(normalize)
1613 {
1614 XMLString str = getStringValue(nodeHandle);
1615 str = str.fixWhiteSpace(true, true, false);
1616 str.dispatchCharactersEvents(ch);
1617 }
1618 else
1619 {
1620 int type = getNodeType(nodeHandle);
1621 Node node = getNode(nodeHandle);
1622 dispatchNodeData(node, ch, 0);
1623 // Text coalition -- a DTM text node may represent multiple
1624 // DOM nodes.
1625 if(TEXT_NODE == type || CDATA_SECTION_NODE == type)
1626 {
1627 while( null != (node=logicalNextDOMTextNode(node)) )
1628 {
1629 dispatchNodeData(node, ch, 0);
1630 }
1631 }
1632 }
1633 }
1634
1635 /**
1636 * Retrieve the text content of a DOM subtree, appending it into a
1637 * user-supplied FastStringBuffer object. Note that attributes are
1638 * not considered part of the content of an element.
1639 * <p>
1640 * There are open questions regarding whitespace stripping.
1641 * Currently we make no special effort in that regard, since the standard
1642 * DOM doesn't yet provide DTD-based information to distinguish
1643 * whitespace-in-element-context from genuine #PCDATA. Note that we
1644 * should probably also consider xml:space if/when we address this.
1645 * DOM Level 3 may solve the problem for us.
1646 * <p>
1647 * %REVIEW% Note that as a DOM-level operation, it can be argued that this
1648 * routine _shouldn't_ perform any processing beyond what the DOM already
1649 * does, and that whitespace stripping and so on belong at the DTM level.
1650 * If you want a stripped DOM view, wrap DTM2DOM around DOM2DTM.
1651 *
1652 * @param node Node whose subtree is to be walked, gathering the
1653 * contents of all Text or CDATASection nodes.
1654 */
1655 protected static void dispatchNodeData(Node node,
1656 org.xml.sax.ContentHandler ch,
1657 int depth)
1658 throws org.xml.sax.SAXException
1659 {
1660
1661 switch (node.getNodeType())
1662 {
1663 case Node.DOCUMENT_FRAGMENT_NODE :
1664 case Node.DOCUMENT_NODE :
1665 case Node.ELEMENT_NODE :
1666 {
1667 for (Node child = node.getFirstChild(); null != child;
1668 child = child.getNextSibling())
1669 {
1670 dispatchNodeData(child, ch, depth+1);
1671 }
1672 }
1673 break;
1674 case Node.PROCESSING_INSTRUCTION_NODE : // %REVIEW%
1675 case Node.COMMENT_NODE :
1676 if(0 != depth)
1677 break;
1678 // NOTE: Because this operation works in the DOM space, it does _not_ attempt
1679 // to perform Text Coalition. That should only be done in DTM space.
1680 case Node.TEXT_NODE :
1681 case Node.CDATA_SECTION_NODE :
1682 case Node.ATTRIBUTE_NODE :
1683 String str = node.getNodeValue();
1684 if(ch instanceof CharacterNodeHandler)
1685 {
1686 ((CharacterNodeHandler)ch).characters(node);
1687 }
1688 else
1689 {
1690 ch.characters(str.toCharArray(), 0, str.length());
1691 }
1692 break;
1693 // /* case Node.PROCESSING_INSTRUCTION_NODE :
1694 // // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING);
1695 // break; */
1696 default :
1697 // ignore
1698 break;
1699 }
1700 }
1701
1702 TreeWalker m_walker = new TreeWalker(null);
1703
1704 /**
1705 * Directly create SAX parser events from a subtree.
1706 *
1707 * @param nodeHandle The node ID.
1708 * @param ch A non-null reference to a ContentHandler.
1709 *
1710 * @throws org.xml.sax.SAXException
1711 */
1712 public void dispatchToEvents(int nodeHandle, org.xml.sax.ContentHandler ch)
1713 throws org.xml.sax.SAXException
1714 {
1715 TreeWalker treeWalker = m_walker;
1716 ContentHandler prevCH = treeWalker.getContentHandler();
1717
1718 if(null != prevCH)
1719 {
1720 treeWalker = new TreeWalker(null);
1721 }
1722 treeWalker.setContentHandler(ch);
1723
1724 try
1725 {
1726 Node node = getNode(nodeHandle);
1727 treeWalker.traverseFragment(node);
1728 }
1729 finally
1730 {
1731 treeWalker.setContentHandler(null);
1732 }
1733 }
1734
1735 public interface CharacterNodeHandler
1736 {
1737 public void characters(Node node)
1738 throws org.xml.sax.SAXException;
1739 }
1740
1741 /**
1742 * For the moment all the run time properties are ignored by this
1743 * class.
1744 *
1745 * @param property a <code>String</code> value
1746 * @param value an <code>Object</code> value
1747 */
1748 public void setProperty(String property, Object value)
1749 {
1750 }
1751
1752 /**
1753 * No source information is available for DOM2DTM, so return
1754 * <code>null</code> here.
1755 *
1756 * @param node an <code>int</code> value
1757 * @return null
1758 */
1759 public SourceLocator getSourceLocatorFor(int node)
1760 {
1761 return null;
1762 }
1763
1764 }
1765
1766