001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the "License");
007 * you may not use this file except in compliance with the License.
008 * You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 /*
019 * $Id: DOMHelper.java 468655 2006-10-28 07:12:06Z minchau $
020 */
021 package org.apache.xml.utils;
022
023 import java.util.Hashtable;
024 import java.util.Vector;
025
026 import javax.xml.XMLConstants;
027 import javax.xml.parsers.DocumentBuilder;
028 import javax.xml.parsers.DocumentBuilderFactory;
029 import javax.xml.parsers.ParserConfigurationException;
030
031 import org.apache.xml.dtm.ref.DTMNodeProxy;
032 import org.apache.xml.res.XMLErrorResources;
033 import org.apache.xml.res.XMLMessages;
034
035 import org.w3c.dom.Attr;
036 import org.w3c.dom.DOMImplementation;
037 import org.w3c.dom.Document;
038 import org.w3c.dom.DocumentType;
039 import org.w3c.dom.Element;
040 import org.w3c.dom.Entity;
041 import org.w3c.dom.NamedNodeMap;
042 import org.w3c.dom.Node;
043 import org.w3c.dom.Text;
044
045 /**
046 * @deprecated Since the introduction of the DTM, this class will be removed.
047 * This class provides a front-end to DOM implementations, providing
048 * a number of utility functions that either aren't yet standardized
049 * by the DOM spec or that are defined in optional DOM modules and
050 * hence may not be present in all DOMs.
051 */
052 public class DOMHelper
053 {
054
055 /**
056 * DOM Level 1 did not have a standard mechanism for creating a new
057 * Document object. This function provides a DOM-implementation-independent
058 * abstraction for that for that concept. It's typically used when
059 * outputting a new DOM as the result of an operation.
060 * <p>
061 * TODO: This isn't directly compatable with DOM Level 2.
062 * The Level 2 createDocument call also creates the root
063 * element, and thus requires that you know what that element will be
064 * before creating the Document. We should think about whether we want
065 * to change this code, and the callers, so we can use the DOM's own
066 * method. (It's also possible that DOM Level 3 may relax this
067 * sequence, but you may give up some intelligence in the DOM by
068 * doing so; the intent was that knowing the document type and root
069 * element might let the DOM automatically switch to a specialized
070 * subclass for particular kinds of documents.)
071 *
072 * @param isSecureProcessing state of the secure processing feature.
073 * @return The newly created DOM Document object, with no children, or
074 * null if we can't find a DOM implementation that permits creating
075 * new empty Documents.
076 */
077 public static Document createDocument(boolean isSecureProcessing)
078 {
079
080 try
081 {
082
083 // Use an implementation of the JAVA API for XML Parsing 1.0 to
084 // create a DOM Document node to contain the result.
085 DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance();
086
087 dfactory.setNamespaceAware(true);
088 dfactory.setValidating(true);
089
090 if (isSecureProcessing)
091 {
092 try
093 {
094 dfactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
095 }
096 catch (ParserConfigurationException pce) {}
097 }
098
099 DocumentBuilder docBuilder = dfactory.newDocumentBuilder();
100 Document outNode = docBuilder.newDocument();
101
102 return outNode;
103 }
104 catch (ParserConfigurationException pce)
105 {
106 throw new RuntimeException(
107 XMLMessages.createXMLMessage(
108 XMLErrorResources.ER_CREATEDOCUMENT_NOT_SUPPORTED, null)); //"createDocument() not supported in XPathContext!");
109
110 // return null;
111 }
112 }
113
114 /**
115 * DOM Level 1 did not have a standard mechanism for creating a new
116 * Document object. This function provides a DOM-implementation-independent
117 * abstraction for that for that concept. It's typically used when
118 * outputting a new DOM as the result of an operation.
119 *
120 * @return The newly created DOM Document object, with no children, or
121 * null if we can't find a DOM implementation that permits creating
122 * new empty Documents.
123 */
124 public static Document createDocument()
125 {
126 return createDocument(false);
127 }
128
129 /**
130 * Tells, through the combination of the default-space attribute
131 * on xsl:stylesheet, xsl:strip-space, xsl:preserve-space, and the
132 * xml:space attribute, whether or not extra whitespace should be stripped
133 * from the node. Literal elements from template elements should
134 * <em>not</em> be tested with this function.
135 * @param textNode A text node from the source tree.
136 * @return true if the text node should be stripped of extra whitespace.
137 *
138 * @throws javax.xml.transform.TransformerException
139 * @xsl.usage advanced
140 */
141 public boolean shouldStripSourceNode(Node textNode)
142 throws javax.xml.transform.TransformerException
143 {
144
145 // return (null == m_envSupport) ? false : m_envSupport.shouldStripSourceNode(textNode);
146 return false;
147 }
148
149 /**
150 * Supports the XPath function GenerateID by returning a unique
151 * identifier string for any given DOM Node.
152 * <p>
153 * Warning: The base implementation uses the Node object's hashCode(),
154 * which is NOT guaranteed to be unique. If that method hasn't been
155 * overridden in this DOM ipmlementation, most Java implementions will
156 * derive it from the object's address and should be OK... but if
157 * your DOM uses a different definition of hashCode (eg hashing the
158 * contents of the subtree), or if your DOM may have multiple objects
159 * that represent a single Node in the data structure (eg via proxying),
160 * you may need to find another way to assign a unique identifier.
161 * <p>
162 * Also, be aware that if nodes are destroyed and recreated, there is
163 * an open issue regarding whether an ID may be reused. Currently
164 * we're assuming that the input document is stable for the duration
165 * of the XPath/XSLT operation, so this shouldn't arise in this context.
166 * <p>
167 * (DOM Level 3 is investigating providing a unique node "key", but
168 * that won't help Level 1 and Level 2 implementations.)
169 *
170 * @param node whose identifier you want to obtain
171 *
172 * @return a string which should be different for every Node object.
173 */
174 public String getUniqueID(Node node)
175 {
176 return "N" + Integer.toHexString(node.hashCode()).toUpperCase();
177 }
178
179 /**
180 * Figure out whether node2 should be considered as being later
181 * in the document than node1, in Document Order as defined
182 * by the XPath model. This may not agree with the ordering defined
183 * by other XML applications.
184 * <p>
185 * There are some cases where ordering isn't defined, and neither are
186 * the results of this function -- though we'll generally return true.
187 *
188 * TODO: Make sure this does the right thing with attribute nodes!!!
189 *
190 * @param node1 DOM Node to perform position comparison on.
191 * @param node2 DOM Node to perform position comparison on .
192 *
193 * @return false if node2 comes before node1, otherwise return true.
194 * You can think of this as
195 * <code>(node1.documentOrderPosition <= node2.documentOrderPosition)</code>.
196 */
197 public static boolean isNodeAfter(Node node1, Node node2)
198 {
199 if (node1 == node2 || isNodeTheSame(node1, node2))
200 return true;
201
202 // Default return value, if there is no defined ordering
203 boolean isNodeAfter = true;
204
205 Node parent1 = getParentOfNode(node1);
206 Node parent2 = getParentOfNode(node2);
207
208 // Optimize for most common case
209 if (parent1 == parent2 || isNodeTheSame(parent1, parent2)) // then we know they are siblings
210 {
211 if (null != parent1)
212 isNodeAfter = isNodeAfterSibling(parent1, node1, node2);
213 else
214 {
215 // If both parents are null, ordering is not defined.
216 // We're returning a value in lieu of throwing an exception.
217 // Not a case we expect to arise in XPath, but beware if you
218 // try to reuse this method.
219
220 // We can just fall through in this case, which allows us
221 // to hit the debugging code at the end of the function.
222 //return isNodeAfter;
223 }
224 }
225 else
226 {
227
228 // General strategy: Figure out the lengths of the two
229 // ancestor chains, reconcile the lengths, and look for
230 // the lowest common ancestor. If that ancestor is one of
231 // the nodes being compared, it comes before the other.
232 // Otherwise perform a sibling compare.
233 //
234 // NOTE: If no common ancestor is found, ordering is undefined
235 // and we return the default value of isNodeAfter.
236
237 // Count parents in each ancestor chain
238 int nParents1 = 2, nParents2 = 2; // include node & parent obtained above
239
240 while (parent1 != null)
241 {
242 nParents1++;
243
244 parent1 = getParentOfNode(parent1);
245 }
246
247 while (parent2 != null)
248 {
249 nParents2++;
250
251 parent2 = getParentOfNode(parent2);
252 }
253
254 // Initially assume scan for common ancestor starts with
255 // the input nodes.
256 Node startNode1 = node1, startNode2 = node2;
257
258 // If one ancestor chain is longer, adjust its start point
259 // so we're comparing at the same depths
260 if (nParents1 < nParents2)
261 {
262 // Adjust startNode2 to depth of startNode1
263 int adjust = nParents2 - nParents1;
264
265 for (int i = 0; i < adjust; i++)
266 {
267 startNode2 = getParentOfNode(startNode2);
268 }
269 }
270 else if (nParents1 > nParents2)
271 {
272 // adjust startNode1 to depth of startNode2
273 int adjust = nParents1 - nParents2;
274
275 for (int i = 0; i < adjust; i++)
276 {
277 startNode1 = getParentOfNode(startNode1);
278 }
279 }
280
281 Node prevChild1 = null, prevChild2 = null; // so we can "back up"
282
283 // Loop up the ancestor chain looking for common parent
284 while (null != startNode1)
285 {
286 if (startNode1 == startNode2 || isNodeTheSame(startNode1, startNode2)) // common parent?
287 {
288 if (null == prevChild1) // first time in loop?
289 {
290
291 // Edge condition: one is the ancestor of the other.
292 isNodeAfter = (nParents1 < nParents2) ? true : false;
293
294 break; // from while loop
295 }
296 else
297 {
298 // Compare ancestors below lowest-common as siblings
299 isNodeAfter = isNodeAfterSibling(startNode1, prevChild1,
300 prevChild2);
301
302 break; // from while loop
303 }
304 } // end if(startNode1 == startNode2)
305
306 // Move up one level and try again
307 prevChild1 = startNode1;
308 startNode1 = getParentOfNode(startNode1);
309 prevChild2 = startNode2;
310 startNode2 = getParentOfNode(startNode2);
311 } // end while(parents exist to examine)
312 } // end big else (not immediate siblings)
313
314 // WARNING: The following diagnostic won't report the early
315 // "same node" case. Fix if/when needed.
316
317 /* -- please do not remove... very useful for diagnostics --
318 System.out.println("node1 = "+node1.getNodeName()+"("+node1.getNodeType()+")"+
319 ", node2 = "+node2.getNodeName()
320 +"("+node2.getNodeType()+")"+
321 ", isNodeAfter = "+isNodeAfter); */
322 return isNodeAfter;
323 } // end isNodeAfter(Node node1, Node node2)
324
325 /**
326 * Use DTMNodeProxy to determine whether two nodes are the same.
327 *
328 * @param node1 The first DOM node to compare.
329 * @param node2 The second DOM node to compare.
330 * @return true if the two nodes are the same.
331 */
332 public static boolean isNodeTheSame(Node node1, Node node2)
333 {
334 if (node1 instanceof DTMNodeProxy && node2 instanceof DTMNodeProxy)
335 return ((DTMNodeProxy)node1).equals((DTMNodeProxy)node2);
336 else
337 return (node1 == node2);
338 }
339
340 /**
341 * Figure out if child2 is after child1 in document order.
342 * <p>
343 * Warning: Some aspects of "document order" are not well defined.
344 * For example, the order of attributes is considered
345 * meaningless in XML, and the order reported by our model will
346 * be consistant for a given invocation but may not
347 * match that of either the source file or the serialized output.
348 *
349 * @param parent Must be the parent of both child1 and child2.
350 * @param child1 Must be the child of parent and not equal to child2.
351 * @param child2 Must be the child of parent and not equal to child1.
352 * @return true if child 2 is after child1 in document order.
353 */
354 private static boolean isNodeAfterSibling(Node parent, Node child1,
355 Node child2)
356 {
357
358 boolean isNodeAfterSibling = false;
359 short child1type = child1.getNodeType();
360 short child2type = child2.getNodeType();
361
362 if ((Node.ATTRIBUTE_NODE != child1type)
363 && (Node.ATTRIBUTE_NODE == child2type))
364 {
365
366 // always sort attributes before non-attributes.
367 isNodeAfterSibling = false;
368 }
369 else if ((Node.ATTRIBUTE_NODE == child1type)
370 && (Node.ATTRIBUTE_NODE != child2type))
371 {
372
373 // always sort attributes before non-attributes.
374 isNodeAfterSibling = true;
375 }
376 else if (Node.ATTRIBUTE_NODE == child1type)
377 {
378 NamedNodeMap children = parent.getAttributes();
379 int nNodes = children.getLength();
380 boolean found1 = false, found2 = false;
381
382 // Count from the start until we find one or the other.
383 for (int i = 0; i < nNodes; i++)
384 {
385 Node child = children.item(i);
386
387 if (child1 == child || isNodeTheSame(child1, child))
388 {
389 if (found2)
390 {
391 isNodeAfterSibling = false;
392
393 break;
394 }
395
396 found1 = true;
397 }
398 else if (child2 == child || isNodeTheSame(child2, child))
399 {
400 if (found1)
401 {
402 isNodeAfterSibling = true;
403
404 break;
405 }
406
407 found2 = true;
408 }
409 }
410 }
411 else
412 {
413 // TODO: Check performance of alternate solution:
414 // There are two choices here: Count from the start of
415 // the document until we find one or the other, or count
416 // from one until we find or fail to find the other.
417 // Either can wind up scanning all the siblings in the worst
418 // case, which on a wide document can be a lot of work but
419 // is more typically is a short list.
420 // Scanning from the start involves two tests per iteration,
421 // but it isn't clear that scanning from the middle doesn't
422 // yield more iterations on average.
423 // We should run some testcases.
424 Node child = parent.getFirstChild();
425 boolean found1 = false, found2 = false;
426
427 while (null != child)
428 {
429
430 // Node child = children.item(i);
431 if (child1 == child || isNodeTheSame(child1, child))
432 {
433 if (found2)
434 {
435 isNodeAfterSibling = false;
436
437 break;
438 }
439
440 found1 = true;
441 }
442 else if (child2 == child || isNodeTheSame(child2, child))
443 {
444 if (found1)
445 {
446 isNodeAfterSibling = true;
447
448 break;
449 }
450
451 found2 = true;
452 }
453
454 child = child.getNextSibling();
455 }
456 }
457
458 return isNodeAfterSibling;
459 } // end isNodeAfterSibling(Node parent, Node child1, Node child2)
460
461 //==========================================================
462 // SECTION: Namespace resolution
463 //==========================================================
464
465 /**
466 * Get the depth level of this node in the tree (equals 1 for
467 * a parentless node).
468 *
469 * @param n Node to be examined.
470 * @return the number of ancestors, plus one
471 * @xsl.usage internal
472 */
473 public short getLevel(Node n)
474 {
475
476 short level = 1;
477
478 while (null != (n = getParentOfNode(n)))
479 {
480 level++;
481 }
482
483 return level;
484 }
485
486 /**
487 * Given an XML Namespace prefix and a context in which the prefix
488 * is to be evaluated, return the Namespace Name this prefix was
489 * bound to. Note that DOM Level 3 is expected to provide a version of
490 * this which deals with the DOM's "early binding" behavior.
491 *
492 * Default handling:
493 *
494 * @param prefix String containing namespace prefix to be resolved,
495 * without the ':' which separates it from the localname when used
496 * in a Node Name. The empty sting signifies the default namespace
497 * at this point in the document.
498 * @param namespaceContext Element which provides context for resolution.
499 * (We could extend this to work for other nodes by first seeking their
500 * nearest Element ancestor.)
501 *
502 * @return a String containing the Namespace URI which this prefix
503 * represents in the specified context.
504 */
505 public String getNamespaceForPrefix(String prefix, Element namespaceContext)
506 {
507
508 int type;
509 Node parent = namespaceContext;
510 String namespace = null;
511
512 if (prefix.equals("xml"))
513 {
514 namespace = QName.S_XMLNAMESPACEURI; // Hardcoded, per Namespace spec
515 }
516 else if(prefix.equals("xmlns"))
517 {
518 // Hardcoded in the DOM spec, expected to be adopted by
519 // Namespace spec. NOTE: Namespace declarations _must_ use
520 // the xmlns: prefix; other prefixes declared as belonging
521 // to this namespace will not be recognized and should
522 // probably be rejected by parsers as erroneous declarations.
523 namespace = "http://www.w3.org/2000/xmlns/";
524 }
525 else
526 {
527 // Attribute name for this prefix's declaration
528 String declname=(prefix=="")
529 ? "xmlns"
530 : "xmlns:"+prefix;
531
532 // Scan until we run out of Elements or have resolved the namespace
533 while ((null != parent) && (null == namespace)
534 && (((type = parent.getNodeType()) == Node.ELEMENT_NODE)
535 || (type == Node.ENTITY_REFERENCE_NODE)))
536 {
537 if (type == Node.ELEMENT_NODE)
538 {
539
540 // Look for the appropriate Namespace Declaration attribute,
541 // either "xmlns:prefix" or (if prefix is "") "xmlns".
542 // TODO: This does not handle "implicit declarations"
543 // which may be created when the DOM is edited. DOM Level
544 // 3 will define how those should be interpreted. But
545 // this issue won't arise in freshly-parsed DOMs.
546
547 // NOTE: declname is set earlier, outside the loop.
548 Attr attr=((Element)parent).getAttributeNode(declname);
549 if(attr!=null)
550 {
551 namespace = attr.getNodeValue();
552 break;
553 }
554 }
555
556 parent = getParentOfNode(parent);
557 }
558 }
559
560 return namespace;
561 }
562
563 /**
564 * An experiment for the moment.
565 */
566 Hashtable m_NSInfos = new Hashtable();
567
568 /** Object to put into the m_NSInfos table that tells that a node has not been
569 * processed, but has xmlns namespace decls. */
570 protected static final NSInfo m_NSInfoUnProcWithXMLNS = new NSInfo(false,
571 true);
572
573 /** Object to put into the m_NSInfos table that tells that a node has not been
574 * processed, but has no xmlns namespace decls. */
575 protected static final NSInfo m_NSInfoUnProcWithoutXMLNS = new NSInfo(false,
576 false);
577
578 /** Object to put into the m_NSInfos table that tells that a node has not been
579 * processed, and has no xmlns namespace decls, and has no ancestor decls. */
580 protected static final NSInfo m_NSInfoUnProcNoAncestorXMLNS =
581 new NSInfo(false, false, NSInfo.ANCESTORNOXMLNS);
582
583 /** Object to put into the m_NSInfos table that tells that a node has been
584 * processed, and has xmlns namespace decls. */
585 protected static final NSInfo m_NSInfoNullWithXMLNS = new NSInfo(true,
586 true);
587
588 /** Object to put into the m_NSInfos table that tells that a node has been
589 * processed, and has no xmlns namespace decls. */
590 protected static final NSInfo m_NSInfoNullWithoutXMLNS = new NSInfo(true,
591 false);
592
593 /** Object to put into the m_NSInfos table that tells that a node has been
594 * processed, and has no xmlns namespace decls. and has no ancestor decls. */
595 protected static final NSInfo m_NSInfoNullNoAncestorXMLNS =
596 new NSInfo(true, false, NSInfo.ANCESTORNOXMLNS);
597
598 /** Vector of node (odd indexes) and NSInfos (even indexes) that tell if
599 * the given node is a candidate for ancestor namespace processing. */
600 protected Vector m_candidateNoAncestorXMLNS = new Vector();
601
602 /**
603 * Returns the namespace of the given node. Differs from simply getting
604 * the node's prefix and using getNamespaceForPrefix in that it attempts
605 * to cache some of the data in NSINFO objects, to avoid repeated lookup.
606 * TODO: Should we consider moving that logic into getNamespaceForPrefix?
607 *
608 * @param n Node to be examined.
609 *
610 * @return String containing the Namespace Name (uri) for this node.
611 * Note that this is undefined for any nodes other than Elements and
612 * Attributes.
613 */
614 public String getNamespaceOfNode(Node n)
615 {
616
617 String namespaceOfPrefix;
618 boolean hasProcessedNS;
619 NSInfo nsInfo;
620 short ntype = n.getNodeType();
621
622 if (Node.ATTRIBUTE_NODE != ntype)
623 {
624 Object nsObj = m_NSInfos.get(n); // return value
625
626 nsInfo = (nsObj == null) ? null : (NSInfo) nsObj;
627 hasProcessedNS = (nsInfo == null) ? false : nsInfo.m_hasProcessedNS;
628 }
629 else
630 {
631 hasProcessedNS = false;
632 nsInfo = null;
633 }
634
635 if (hasProcessedNS)
636 {
637 namespaceOfPrefix = nsInfo.m_namespace;
638 }
639 else
640 {
641 namespaceOfPrefix = null;
642
643 String nodeName = n.getNodeName();
644 int indexOfNSSep = nodeName.indexOf(':');
645 String prefix;
646
647 if (Node.ATTRIBUTE_NODE == ntype)
648 {
649 if (indexOfNSSep > 0)
650 {
651 prefix = nodeName.substring(0, indexOfNSSep);
652 }
653 else
654 {
655
656 // Attributes don't use the default namespace, so if
657 // there isn't a prefix, we're done.
658 return namespaceOfPrefix;
659 }
660 }
661 else
662 {
663 prefix = (indexOfNSSep >= 0)
664 ? nodeName.substring(0, indexOfNSSep) : "";
665 }
666
667 boolean ancestorsHaveXMLNS = false;
668 boolean nHasXMLNS = false;
669
670 if (prefix.equals("xml"))
671 {
672 namespaceOfPrefix = QName.S_XMLNAMESPACEURI;
673 }
674 else
675 {
676 int parentType;
677 Node parent = n;
678
679 while ((null != parent) && (null == namespaceOfPrefix))
680 {
681 if ((null != nsInfo)
682 && (nsInfo.m_ancestorHasXMLNSAttrs
683 == NSInfo.ANCESTORNOXMLNS))
684 {
685 break;
686 }
687
688 parentType = parent.getNodeType();
689
690 if ((null == nsInfo) || nsInfo.m_hasXMLNSAttrs)
691 {
692 boolean elementHasXMLNS = false;
693
694 if (parentType == Node.ELEMENT_NODE)
695 {
696 NamedNodeMap nnm = parent.getAttributes();
697
698 for (int i = 0; i < nnm.getLength(); i++)
699 {
700 Node attr = nnm.item(i);
701 String aname = attr.getNodeName();
702
703 if (aname.charAt(0) == 'x')
704 {
705 boolean isPrefix = aname.startsWith("xmlns:");
706
707 if (aname.equals("xmlns") || isPrefix)
708 {
709 if (n == parent)
710 nHasXMLNS = true;
711
712 elementHasXMLNS = true;
713 ancestorsHaveXMLNS = true;
714
715 String p = isPrefix ? aname.substring(6) : "";
716
717 if (p.equals(prefix))
718 {
719 namespaceOfPrefix = attr.getNodeValue();
720
721 break;
722 }
723 }
724 }
725 }
726 }
727
728 if ((Node.ATTRIBUTE_NODE != parentType) && (null == nsInfo)
729 && (n != parent))
730 {
731 nsInfo = elementHasXMLNS
732 ? m_NSInfoUnProcWithXMLNS : m_NSInfoUnProcWithoutXMLNS;
733
734 m_NSInfos.put(parent, nsInfo);
735 }
736 }
737
738 if (Node.ATTRIBUTE_NODE == parentType)
739 {
740 parent = getParentOfNode(parent);
741 }
742 else
743 {
744 m_candidateNoAncestorXMLNS.addElement(parent);
745 m_candidateNoAncestorXMLNS.addElement(nsInfo);
746
747 parent = parent.getParentNode();
748 }
749
750 if (null != parent)
751 {
752 Object nsObj = m_NSInfos.get(parent); // return value
753
754 nsInfo = (nsObj == null) ? null : (NSInfo) nsObj;
755 }
756 }
757
758 int nCandidates = m_candidateNoAncestorXMLNS.size();
759
760 if (nCandidates > 0)
761 {
762 if ((false == ancestorsHaveXMLNS) && (null == parent))
763 {
764 for (int i = 0; i < nCandidates; i += 2)
765 {
766 Object candidateInfo = m_candidateNoAncestorXMLNS.elementAt(i
767 + 1);
768
769 if (candidateInfo == m_NSInfoUnProcWithoutXMLNS)
770 {
771 m_NSInfos.put(m_candidateNoAncestorXMLNS.elementAt(i),
772 m_NSInfoUnProcNoAncestorXMLNS);
773 }
774 else if (candidateInfo == m_NSInfoNullWithoutXMLNS)
775 {
776 m_NSInfos.put(m_candidateNoAncestorXMLNS.elementAt(i),
777 m_NSInfoNullNoAncestorXMLNS);
778 }
779 }
780 }
781
782 m_candidateNoAncestorXMLNS.removeAllElements();
783 }
784 }
785
786 if (Node.ATTRIBUTE_NODE != ntype)
787 {
788 if (null == namespaceOfPrefix)
789 {
790 if (ancestorsHaveXMLNS)
791 {
792 if (nHasXMLNS)
793 m_NSInfos.put(n, m_NSInfoNullWithXMLNS);
794 else
795 m_NSInfos.put(n, m_NSInfoNullWithoutXMLNS);
796 }
797 else
798 {
799 m_NSInfos.put(n, m_NSInfoNullNoAncestorXMLNS);
800 }
801 }
802 else
803 {
804 m_NSInfos.put(n, new NSInfo(namespaceOfPrefix, nHasXMLNS));
805 }
806 }
807 }
808
809 return namespaceOfPrefix;
810 }
811
812 /**
813 * Returns the local name of the given node. If the node's name begins
814 * with a namespace prefix, this is the part after the colon; otherwise
815 * it's the full node name.
816 *
817 * @param n the node to be examined.
818 *
819 * @return String containing the Local Name
820 */
821 public String getLocalNameOfNode(Node n)
822 {
823
824 String qname = n.getNodeName();
825 int index = qname.indexOf(':');
826
827 return (index < 0) ? qname : qname.substring(index + 1);
828 }
829
830 /**
831 * Returns the element name with the namespace prefix (if any) replaced
832 * by the Namespace URI it was bound to. This is not a standard
833 * representation of a node name, but it allows convenient
834 * single-string comparison of the "universal" names of two nodes.
835 *
836 * @param elem Element to be examined.
837 *
838 * @return String in the form "namespaceURI:localname" if the node
839 * belongs to a namespace, or simply "localname" if it doesn't.
840 * @see #getExpandedAttributeName
841 */
842 public String getExpandedElementName(Element elem)
843 {
844
845 String namespace = getNamespaceOfNode(elem);
846
847 return (null != namespace)
848 ? namespace + ":" + getLocalNameOfNode(elem)
849 : getLocalNameOfNode(elem);
850 }
851
852 /**
853 * Returns the attribute name with the namespace prefix (if any) replaced
854 * by the Namespace URI it was bound to. This is not a standard
855 * representation of a node name, but it allows convenient
856 * single-string comparison of the "universal" names of two nodes.
857 *
858 * @param attr Attr to be examined
859 *
860 * @return String in the form "namespaceURI:localname" if the node
861 * belongs to a namespace, or simply "localname" if it doesn't.
862 * @see #getExpandedElementName
863 */
864 public String getExpandedAttributeName(Attr attr)
865 {
866
867 String namespace = getNamespaceOfNode(attr);
868
869 return (null != namespace)
870 ? namespace + ":" + getLocalNameOfNode(attr)
871 : getLocalNameOfNode(attr);
872 }
873
874 //==========================================================
875 // SECTION: DOM Helper Functions
876 //==========================================================
877
878 /**
879 * Tell if the node is ignorable whitespace. Note that this can
880 * be determined only in the context of a DTD or other Schema,
881 * and that DOM Level 2 has nostandardized DOM API which can
882 * return that information.
883 * @deprecated
884 *
885 * @param node Node to be examined
886 *
887 * @return CURRENTLY HARDCODED TO FALSE, but should return true if
888 * and only if the node is of type Text, contains only whitespace,
889 * and does not appear as part of the #PCDATA content of an element.
890 * (Note that determining this last may require allowing for
891 * Entity References.)
892 */
893 public boolean isIgnorableWhitespace(Text node)
894 {
895
896 boolean isIgnorable = false; // return value
897
898 // TODO: I can probably do something to figure out if this
899 // space is ignorable from just the information in
900 // the DOM tree.
901 // -- You need to be able to distinguish whitespace
902 // that is #PCDATA from whitespace that isn't. That requires
903 // DTD support, which won't be standardized until DOM Level 3.
904 return isIgnorable;
905 }
906
907 /**
908 * Get the first unparented node in the ancestor chain.
909 * @deprecated
910 *
911 * @param node Starting node, to specify which chain to chase
912 *
913 * @return the topmost ancestor.
914 */
915 public Node getRoot(Node node)
916 {
917
918 Node root = null;
919
920 while (node != null)
921 {
922 root = node;
923 node = getParentOfNode(node);
924 }
925
926 return root;
927 }
928
929 /**
930 * Get the root node of the document tree, regardless of
931 * whether or not the node passed in is a document node.
932 * <p>
933 * TODO: This doesn't handle DocumentFragments or "orphaned" subtrees
934 * -- it's currently returning ownerDocument even when the tree is
935 * not actually part of the main Document tree. We should either
936 * rewrite the description to say that it finds the Document node,
937 * or change the code to walk up the ancestor chain.
938
939 *
940 * @param n Node to be examined
941 *
942 * @return the Document node. Note that this is not the correct answer
943 * if n was (or was a child of) a DocumentFragment or an orphaned node,
944 * as can arise if the DOM has been edited rather than being generated
945 * by a parser.
946 */
947 public Node getRootNode(Node n)
948 {
949 int nt = n.getNodeType();
950 return ( (Node.DOCUMENT_NODE == nt) || (Node.DOCUMENT_FRAGMENT_NODE == nt) )
951 ? n : n.getOwnerDocument();
952 }
953
954 /**
955 * Test whether the given node is a namespace decl node. In DOM Level 2
956 * this can be done in a namespace-aware manner, but in Level 1 DOMs
957 * it has to be done by testing the node name.
958 *
959 * @param n Node to be examined.
960 *
961 * @return boolean -- true iff the node is an Attr whose name is
962 * "xmlns" or has the "xmlns:" prefix.
963 */
964 public boolean isNamespaceNode(Node n)
965 {
966
967 if (Node.ATTRIBUTE_NODE == n.getNodeType())
968 {
969 String attrName = n.getNodeName();
970
971 return (attrName.startsWith("xmlns:") || attrName.equals("xmlns"));
972 }
973
974 return false;
975 }
976
977 /**
978 * Obtain the XPath-model parent of a DOM node -- ownerElement for Attrs,
979 * parent for other nodes.
980 * <p>
981 * Background: The DOM believes that you must be your Parent's
982 * Child, and thus Attrs don't have parents. XPath said that Attrs
983 * do have their owning Element as their parent. This function
984 * bridges the difference, either by using the DOM Level 2 ownerElement
985 * function or by using a "silly and expensive function" in Level 1
986 * DOMs.
987 * <p>
988 * (There's some discussion of future DOMs generalizing ownerElement
989 * into ownerNode and making it work on all types of nodes. This
990 * still wouldn't help the users of Level 1 or Level 2 DOMs)
991 * <p>
992 *
993 * @param node Node whose XPath parent we want to obtain
994 *
995 * @return the parent of the node, or the ownerElement if it's an
996 * Attr node, or null if the node is an orphan.
997 *
998 * @throws RuntimeException if the Document has no root element.
999 * This can't arise if the Document was created
1000 * via the DOM Level 2 factory methods, but is possible if other
1001 * mechanisms were used to obtain it
1002 */
1003 public static Node getParentOfNode(Node node) throws RuntimeException
1004 {
1005 Node parent;
1006 short nodeType = node.getNodeType();
1007
1008 if (Node.ATTRIBUTE_NODE == nodeType)
1009 {
1010 Document doc = node.getOwnerDocument();
1011 /*
1012 TBD:
1013 if(null == doc)
1014 {
1015 throw new RuntimeException(XSLMessages.createXPATHMessage(XPATHErrorResources.ER_CHILD_HAS_NO_OWNER_DOCUMENT, null));//"Attribute child does not have an owner document!");
1016 }
1017 */
1018
1019 // Given how expensive the tree walk may be, we should first ask
1020 // whether this DOM can answer the question for us. The additional
1021 // test does slow down Level 1 DOMs slightly. DOMHelper2, which
1022 // is currently specialized for Xerces, assumes it can use the
1023 // Level 2 solution. We might want to have an intermediate stage,
1024 // which would assume DOM Level 2 but not assume Xerces.
1025 //
1026 // (Shouldn't have to check whether impl is null in a compliant DOM,
1027 // but let's be paranoid for a moment...)
1028 DOMImplementation impl=doc.getImplementation();
1029 if(impl!=null && impl.hasFeature("Core","2.0"))
1030 {
1031 parent=((Attr)node).getOwnerElement();
1032 return parent;
1033 }
1034
1035 // DOM Level 1 solution, as fallback. Hugely expensive.
1036
1037 Element rootElem = doc.getDocumentElement();
1038
1039 if (null == rootElem)
1040 {
1041 throw new RuntimeException(
1042 XMLMessages.createXMLMessage(
1043 XMLErrorResources.ER_CHILD_HAS_NO_OWNER_DOCUMENT_ELEMENT,
1044 null)); //"Attribute child does not have an owner document element!");
1045 }
1046
1047 parent = locateAttrParent(rootElem, node);
1048
1049 }
1050 else
1051 {
1052 parent = node.getParentNode();
1053
1054 // if((Node.DOCUMENT_NODE != nodeType) && (null == parent))
1055 // {
1056 // throw new RuntimeException("Child does not have parent!");
1057 // }
1058 }
1059
1060 return parent;
1061 }
1062
1063 /**
1064 * Given an ID, return the element. This can work only if the document
1065 * is interpreted in the context of a DTD or Schema, since otherwise
1066 * we don't know which attributes are or aren't IDs.
1067 * <p>
1068 * Note that DOM Level 1 had no ability to retrieve this information.
1069 * DOM Level 2 introduced it but does not promise that it will be
1070 * supported in all DOMs; those which can't support it will always
1071 * return null.
1072 * <p>
1073 * TODO: getElementByID is currently unimplemented. Support DOM Level 2?
1074 *
1075 * @param id The unique identifier to be searched for.
1076 * @param doc The document to search within.
1077 * @return CURRENTLY HARDCODED TO NULL, but it should be:
1078 * The node which has this unique identifier, or null if there
1079 * is no such node or this DOM can't reliably recognize it.
1080 */
1081 public Element getElementByID(String id, Document doc)
1082 {
1083 return null;
1084 }
1085
1086 /**
1087 * The getUnparsedEntityURI function returns the URI of the unparsed
1088 * entity with the specified name in the same document as the context
1089 * node (see [3.3 Unparsed Entities]). It returns the empty string if
1090 * there is no such entity.
1091 * <p>
1092 * XML processors may choose to use the System Identifier (if one
1093 * is provided) to resolve the entity, rather than the URI in the
1094 * Public Identifier. The details are dependent on the processor, and
1095 * we would have to support some form of plug-in resolver to handle
1096 * this properly. Currently, we simply return the System Identifier if
1097 * present, and hope that it a usable URI or that our caller can
1098 * map it to one.
1099 * TODO: Resolve Public Identifiers... or consider changing function name.
1100 * <p>
1101 * If we find a relative URI
1102 * reference, XML expects it to be resolved in terms of the base URI
1103 * of the document. The DOM doesn't do that for us, and it isn't
1104 * entirely clear whether that should be done here; currently that's
1105 * pushed up to a higher levelof our application. (Note that DOM Level
1106 * 1 didn't store the document's base URI.)
1107 * TODO: Consider resolving Relative URIs.
1108 * <p>
1109 * (The DOM's statement that "An XML processor may choose to
1110 * completely expand entities before the structure model is passed
1111 * to the DOM" refers only to parsed entities, not unparsed, and hence
1112 * doesn't affect this function.)
1113 *
1114 * @param name A string containing the Entity Name of the unparsed
1115 * entity.
1116 * @param doc Document node for the document to be searched.
1117 *
1118 * @return String containing the URI of the Unparsed Entity, or an
1119 * empty string if no such entity exists.
1120 */
1121 public String getUnparsedEntityURI(String name, Document doc)
1122 {
1123
1124 String url = "";
1125 DocumentType doctype = doc.getDoctype();
1126
1127 if (null != doctype)
1128 {
1129 NamedNodeMap entities = doctype.getEntities();
1130 if(null == entities)
1131 return url;
1132 Entity entity = (Entity) entities.getNamedItem(name);
1133 if(null == entity)
1134 return url;
1135
1136 String notationName = entity.getNotationName();
1137
1138 if (null != notationName) // then it's unparsed
1139 {
1140 // The draft says: "The XSLT processor may use the public
1141 // identifier to generate a URI for the entity instead of the URI
1142 // specified in the system identifier. If the XSLT processor does
1143 // not use the public identifier to generate the URI, it must use
1144 // the system identifier; if the system identifier is a relative
1145 // URI, it must be resolved into an absolute URI using the URI of
1146 // the resource containing the entity declaration as the base
1147 // URI [RFC2396]."
1148 // So I'm falling a bit short here.
1149 url = entity.getSystemId();
1150
1151 if (null == url)
1152 {
1153 url = entity.getPublicId();
1154 }
1155 else
1156 {
1157 // This should be resolved to an absolute URL, but that's hard
1158 // to do from here.
1159 }
1160 }
1161 }
1162
1163 return url;
1164 }
1165
1166 /**
1167 * Support for getParentOfNode; walks a DOM tree until it finds
1168 * the Element which owns the Attr. This is hugely expensive, and
1169 * if at all possible you should use the DOM Level 2 Attr.ownerElement()
1170 * method instead.
1171 * <p>
1172 * The DOM Level 1 developers expected that folks would keep track
1173 * of the last Element they'd seen and could recover the info from
1174 * that source. Obviously that doesn't work very well if the only
1175 * information you've been presented with is the Attr. The DOM Level 2
1176 * getOwnerElement() method fixes that, but only for Level 2 and
1177 * later DOMs.
1178 *
1179 * @param elem Element whose subtree is to be searched for this Attr
1180 * @param attr Attr whose owner is to be located.
1181 *
1182 * @return the first Element whose attribute list includes the provided
1183 * attr. In modern DOMs, this will also be the only such Element. (Early
1184 * DOMs had some hope that Attrs might be sharable, but this idea has
1185 * been abandoned.)
1186 */
1187 private static Node locateAttrParent(Element elem, Node attr)
1188 {
1189
1190 Node parent = null;
1191
1192 // This should only be called for Level 1 DOMs, so we don't have to
1193 // worry about namespace issues. In later levels, it's possible
1194 // for a DOM to have two Attrs with the same NodeName but
1195 // different namespaces, and we'd need to get getAttributeNodeNS...
1196 // but later levels also have Attr.getOwnerElement.
1197 Attr check=elem.getAttributeNode(attr.getNodeName());
1198 if(check==attr)
1199 parent = elem;
1200
1201 if (null == parent)
1202 {
1203 for (Node node = elem.getFirstChild(); null != node;
1204 node = node.getNextSibling())
1205 {
1206 if (Node.ELEMENT_NODE == node.getNodeType())
1207 {
1208 parent = locateAttrParent((Element) node, attr);
1209
1210 if (null != parent)
1211 break;
1212 }
1213 }
1214 }
1215
1216 return parent;
1217 }
1218
1219 /**
1220 * The factory object used for creating nodes
1221 * in the result tree.
1222 */
1223 protected Document m_DOMFactory = null;
1224
1225 /**
1226 * Store the factory object required to create DOM nodes
1227 * in the result tree. In fact, that's just the result tree's
1228 * Document node...
1229 *
1230 * @param domFactory The DOM Document Node within whose context
1231 * the result tree will be built.
1232 */
1233 public void setDOMFactory(Document domFactory)
1234 {
1235 this.m_DOMFactory = domFactory;
1236 }
1237
1238 /**
1239 * Retrieve the factory object required to create DOM nodes
1240 * in the result tree.
1241 *
1242 * @return The result tree's DOM Document Node.
1243 */
1244 public Document getDOMFactory()
1245 {
1246
1247 if (null == this.m_DOMFactory)
1248 {
1249 this.m_DOMFactory = createDocument();
1250 }
1251
1252 return this.m_DOMFactory;
1253 }
1254
1255 /**
1256 * Get the textual contents of the node. See
1257 * getNodeData(Node,FastStringBuffer) for discussion of how
1258 * whitespace nodes are handled.
1259 *
1260 * @param node DOM Node to be examined
1261 * @return String containing a concatenation of all the
1262 * textual content within that node.
1263 * @see #getNodeData(Node,FastStringBuffer)
1264 *
1265 */
1266 public static String getNodeData(Node node)
1267 {
1268
1269 FastStringBuffer buf = StringBufferPool.get();
1270 String s;
1271
1272 try
1273 {
1274 getNodeData(node, buf);
1275
1276 s = (buf.length() > 0) ? buf.toString() : "";
1277 }
1278 finally
1279 {
1280 StringBufferPool.free(buf);
1281 }
1282
1283 return s;
1284 }
1285
1286 /**
1287 * Retrieve the text content of a DOM subtree, appending it into a
1288 * user-supplied FastStringBuffer object. Note that attributes are
1289 * not considered part of the content of an element.
1290 * <p>
1291 * There are open questions regarding whitespace stripping.
1292 * Currently we make no special effort in that regard, since the standard
1293 * DOM doesn't yet provide DTD-based information to distinguish
1294 * whitespace-in-element-context from genuine #PCDATA. Note that we
1295 * should probably also consider xml:space if/when we address this.
1296 * DOM Level 3 may solve the problem for us.
1297 *
1298 * @param node Node whose subtree is to be walked, gathering the
1299 * contents of all Text or CDATASection nodes.
1300 * @param buf FastStringBuffer into which the contents of the text
1301 * nodes are to be concatenated.
1302 */
1303 public static void getNodeData(Node node, FastStringBuffer buf)
1304 {
1305
1306 switch (node.getNodeType())
1307 {
1308 case Node.DOCUMENT_FRAGMENT_NODE :
1309 case Node.DOCUMENT_NODE :
1310 case Node.ELEMENT_NODE :
1311 {
1312 for (Node child = node.getFirstChild(); null != child;
1313 child = child.getNextSibling())
1314 {
1315 getNodeData(child, buf);
1316 }
1317 }
1318 break;
1319 case Node.TEXT_NODE :
1320 case Node.CDATA_SECTION_NODE :
1321 buf.append(node.getNodeValue());
1322 break;
1323 case Node.ATTRIBUTE_NODE :
1324 buf.append(node.getNodeValue());
1325 break;
1326 case Node.PROCESSING_INSTRUCTION_NODE :
1327 // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING);
1328 break;
1329 default :
1330 // ignore
1331 break;
1332 }
1333 }
1334 }