001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements. See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership. The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the  "License");
007     * you may not use this file except in compliance with the License.
008     * You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    /*
019     * $Id: DTM.java 468653 2006-10-28 07:07:05Z minchau $
020     */
021    package org.apache.xml.dtm;
022    
023    import javax.xml.transform.SourceLocator;
024    
025    import org.apache.xml.utils.XMLString;
026    
027    /**
028     * <code>DTM</code> is an XML document model expressed as a table
029     * rather than an object tree. It attempts to provide an interface to
030     * a parse tree that has very little object creation. (DTM
031     * implementations may also support incremental construction of the
032     * model, but that's hidden from the DTM API.)
033     *
034     * <p>Nodes in the DTM are identified by integer "handles".  A handle must
035     * be unique within a process, and carries both node identification and
036     * document identification.  It must be possible to compare two handles
037     * (and thus their nodes) for identity with "==".</p>
038     *
039     * <p>Namespace URLs, local-names, and expanded-names can all be
040     * represented by and tested as integer ID values.  An expanded name
041     * represents (and may or may not directly contain) a combination of
042     * the URL ID, and the local-name ID.  Note that the namespace URL id
043     * can be 0, which should have the meaning that the namespace is null.
044     * For consistancy, zero should not be used for a local-name index. </p>
045     *
046     * <p>Text content of a node is represented by an index and length,
047     * permitting efficient storage such as a shared FastStringBuffer.</p>
048     *
049     * <p>The model of the tree, as well as the general navigation model,
050     * is that of XPath 1.0, for the moment.  The model will eventually be
051     * adapted to match the XPath 2.0 data model, XML Schema, and
052     * InfoSet.</p>
053     *
054     * <p>DTM does _not_ directly support the W3C's Document Object
055     * Model. However, it attempts to come close enough that an
056     * implementation of DTM can be created that wraps a DOM and vice
057     * versa.</p>
058     *
059     * <p><strong>Please Note:</strong> The DTM API is still
060     * <strong>Subject To Change.</strong> This wouldn't affect most
061     * users, but might require updating some extensions.</p>
062     *
063     * <p> The largest change being contemplated is a reconsideration of
064     * the Node Handle representation.  We are still not entirely sure
065     * that an integer packed with two numeric subfields is really the
066     * best solution. It has been suggested that we move up to a Long, to
067     * permit more nodes per document without having to reduce the number
068     * of slots in the DTMManager. There's even been a proposal that we
069     * replace these integers with "cursor" objects containing the
070     * internal node id and a pointer to the actual DTM object; this might
071     * reduce the need to continuously consult the DTMManager to retrieve
072     * the latter, and might provide a useful "hook" back into normal Java
073     * heap management.  But changing this datatype would have huge impact
074     * on Xalan's internals -- especially given Java's lack of C-style
075     * typedefs -- so we won't cut over unless we're convinced the new
076     * solution really would be an improvement!</p>
077     * */
078    public interface DTM
079    {
080    
081      /**
082       * Null node handles are represented by this value.
083       */
084      public static final int NULL = -1;
085    
086      // These nodeType mnemonics and values are deliberately the same as those
087      // used by the DOM, for convenient mapping
088      //
089      // %REVIEW% Should we actually define these as initialized to,
090      // eg. org.w3c.dom.Document.ELEMENT_NODE?
091    
092      /**
093       * The node is a <code>Root</code>.
094       */
095      public static final short ROOT_NODE = 0;
096      
097      /**
098       * The node is an <code>Element</code>.
099       */
100      public static final short ELEMENT_NODE = 1;
101    
102      /**
103       * The node is an <code>Attr</code>.
104       */
105      public static final short ATTRIBUTE_NODE = 2;
106    
107      /**
108       * The node is a <code>Text</code> node.
109       */
110      public static final short TEXT_NODE = 3;
111    
112      /**
113       * The node is a <code>CDATASection</code>.
114       */
115      public static final short CDATA_SECTION_NODE = 4;
116    
117      /**
118       * The node is an <code>EntityReference</code>.
119       */
120      public static final short ENTITY_REFERENCE_NODE = 5;
121    
122      /**
123       * The node is an <code>Entity</code>.
124       */
125      public static final short ENTITY_NODE = 6;
126    
127      /**
128       * The node is a <code>ProcessingInstruction</code>.
129       */
130      public static final short PROCESSING_INSTRUCTION_NODE = 7;
131    
132      /**
133       * The node is a <code>Comment</code>.
134       */
135      public static final short COMMENT_NODE = 8;
136    
137      /**
138       * The node is a <code>Document</code>.
139       */
140      public static final short DOCUMENT_NODE = 9;
141    
142      /**
143       * The node is a <code>DocumentType</code>.
144       */
145      public static final short DOCUMENT_TYPE_NODE = 10;
146    
147      /**
148       * The node is a <code>DocumentFragment</code>.
149       */
150      public static final short DOCUMENT_FRAGMENT_NODE = 11;
151    
152      /**
153       * The node is a <code>Notation</code>.
154       */
155      public static final short NOTATION_NODE = 12;
156    
157      /**
158       * The node is a <code>namespace node</code>. Note that this is not
159       * currently a node type defined by the DOM API.
160       */
161      public static final short NAMESPACE_NODE = 13;
162      
163      /**
164       * The number of valid nodetypes.
165       */
166      public static final short  NTYPES = 14;
167    
168      // ========= DTM Implementation Control Functions. ==============
169      // %TBD% RETIRED -- do via setFeature if needed. Remove from impls.
170      // public void setParseBlockSize(int blockSizeSuggestion);
171    
172      /**
173       * Set an implementation dependent feature.
174       * <p>
175       * %REVIEW% Do we really expect to set features on DTMs?
176       *
177       * @param featureId A feature URL.
178       * @param state true if this feature should be on, false otherwise.
179       */
180      public void setFeature(String featureId, boolean state);
181    
182      /**
183       * Set a run time property for this DTM instance.
184       *
185       * @param property a <code>String</code> value
186       * @param value an <code>Object</code> value
187       */
188      public void setProperty(String property, Object value);
189    
190      // ========= Document Navigation Functions =========
191    
192      /**
193       * This returns a stateless "traverser", that can navigate over an
194       * XPath axis, though not in document order.
195       *
196       * @param axis One of Axes.ANCESTORORSELF, etc.
197       *
198       * @return A DTMAxisIterator, or null if the givin axis isn't supported.
199       */
200      public DTMAxisTraverser getAxisTraverser(final int axis);
201    
202      /**
203       * This is a shortcut to the iterators that implement
204       * XPath axes.
205       * Returns a bare-bones iterator that must be initialized
206       * with a start node (using iterator.setStartNode()).
207       *
208       * @param axis One of Axes.ANCESTORORSELF, etc.
209       *
210       * @return A DTMAxisIterator, or null if the givin axis isn't supported.
211       */
212      public DTMAxisIterator getAxisIterator(final int axis);
213    
214      /**
215       * Get an iterator that can navigate over an XPath Axis, predicated by
216       * the extended type ID.
217       *
218       * @param axis
219       * @param type An extended type ID.
220       *
221       * @return A DTMAxisIterator, or null if the givin axis isn't supported.
222       */
223      public DTMAxisIterator getTypedAxisIterator(final int axis, final int type);
224    
225      /**
226       * Given a node handle, test if it has child nodes.
227       * <p> %REVIEW% This is obviously useful at the DOM layer, where it
228       * would permit testing this without having to create a proxy
229       * node. It's less useful in the DTM API, where
230       * (dtm.getFirstChild(nodeHandle)!=DTM.NULL) is just as fast and
231       * almost as self-evident. But it's a convenience, and eases porting
232       * of DOM code to DTM.  </p>
233       *
234       * @param nodeHandle int Handle of the node.
235       * @return int true if the given node has child nodes.
236       */
237      public boolean hasChildNodes(int nodeHandle);
238    
239      /**
240       * Given a node handle, get the handle of the node's first child.
241       *
242       * @param nodeHandle int Handle of the node.
243       * @return int DTM node-number of first child,
244       * or DTM.NULL to indicate none exists.
245       */
246      public int getFirstChild(int nodeHandle);
247    
248      /**
249       * Given a node handle, get the handle of the node's last child.
250       *
251       * @param nodeHandle int Handle of the node.
252       * @return int Node-number of last child,
253       * or DTM.NULL to indicate none exists.
254       */
255      public int getLastChild(int nodeHandle);
256    
257      /**
258       * Retrieves an attribute node by local name and namespace URI
259       *
260       * %TBD% Note that we currently have no way to support
261       * the DOM's old getAttribute() call, which accesses only the qname.
262       *
263       * @param elementHandle Handle of the node upon which to look up this attribute.
264       * @param namespaceURI The namespace URI of the attribute to
265       *   retrieve, or null.
266       * @param name The local name of the attribute to
267       *   retrieve.
268       * @return The attribute node handle with the specified name (
269       *   <code>nodeName</code>) or <code>DTM.NULL</code> if there is no such
270       *   attribute.
271       */
272      public int getAttributeNode(int elementHandle, String namespaceURI,
273                                  String name);
274    
275      /**
276       * Given a node handle, get the index of the node's first attribute.
277       *
278       * @param nodeHandle int Handle of the node.
279       * @return Handle of first attribute, or DTM.NULL to indicate none exists.
280       */
281      public int getFirstAttribute(int nodeHandle);
282    
283      /**
284       * Given a node handle, get the index of the node's first namespace node.
285       *
286       * @param nodeHandle handle to node, which should probably be an element
287       *                   node, but need not be.
288       *
289       * @param inScope true if all namespaces in scope should be
290       *                   returned, false if only the node's own
291       *                   namespace declarations should be returned.
292       * @return handle of first namespace,
293       * or DTM.NULL to indicate none exists.
294       */
295      public int getFirstNamespaceNode(int nodeHandle, boolean inScope);
296    
297      /**
298       * Given a node handle, advance to its next sibling.
299       * @param nodeHandle int Handle of the node.
300       * @return int Node-number of next sibling,
301       * or DTM.NULL to indicate none exists.
302       */
303      public int getNextSibling(int nodeHandle);
304    
305      /**
306       * Given a node handle, find its preceeding sibling.
307       * WARNING: DTM implementations may be asymmetric; in some,
308       * this operation has been resolved by search, and is relatively expensive.
309       *
310       * @param nodeHandle the id of the node.
311       * @return int Node-number of the previous sib,
312       * or DTM.NULL to indicate none exists.
313       */
314      public int getPreviousSibling(int nodeHandle);
315    
316      /**
317       * Given a node handle, advance to the next attribute. If an
318       * element, we advance to its first attribute; if an attr, we advance to
319       * the next attr of the same element.
320       *
321       * @param nodeHandle int Handle of the node.
322       * @return int DTM node-number of the resolved attr,
323       * or DTM.NULL to indicate none exists.
324       */
325      public int getNextAttribute(int nodeHandle);
326    
327      /**
328       * Given a namespace handle, advance to the next namespace in the same scope
329       * (local or local-plus-inherited, as selected by getFirstNamespaceNode)
330       *
331       * @param baseHandle handle to original node from where the first child
332       * was relative to (needed to return nodes in document order).
333       * @param namespaceHandle handle to node which must be of type
334       * NAMESPACE_NODE.
335       * NEEDSDOC @param inScope
336       * @return handle of next namespace,
337       * or DTM.NULL to indicate none exists.
338       */
339      public int getNextNamespaceNode(int baseHandle, int namespaceHandle,
340                                      boolean inScope);
341    
342      /**
343       * Given a node handle, find its parent node.
344       *
345       * @param nodeHandle the id of the node.
346       * @return int Node handle of parent,
347       * or DTM.NULL to indicate none exists.
348       */
349      public int getParent(int nodeHandle);
350    
351      /**
352       * Given a DTM which contains only a single document, 
353       * find the Node Handle of the  Document node. Note 
354       * that if the DTM is configured so it can contain multiple
355       * documents, this call will return the Document currently
356       * under construction -- but may return null if it's between
357       * documents. Generally, you should use getOwnerDocument(nodeHandle)
358       * or getDocumentRoot(nodeHandle) instead.
359       *
360       * @return int Node handle of document, or DTM.NULL if a shared DTM
361       * can not tell us which Document is currently active.
362       */
363      public int getDocument();
364    
365      /**
366       * Given a node handle, find the owning document node. This version mimics
367       * the behavior of the DOM call by the same name.
368       *
369       * @param nodeHandle the id of the node.
370       * @return int Node handle of owning document, or DTM.NULL if the node was
371       * a Document.
372       * @see #getDocumentRoot(int nodeHandle)
373       */
374      public int getOwnerDocument(int nodeHandle);
375    
376      /**
377       * Given a node handle, find the owning document node.
378       *
379       * @param nodeHandle the id of the node.
380       * @return int Node handle of owning document, or the node itself if it was
381       * a Document. (Note difference from DOM, where getOwnerDocument returns
382       * null for the Document node.)
383       * @see #getOwnerDocument(int nodeHandle)
384       */
385      public int getDocumentRoot(int nodeHandle);
386    
387      /**
388       * Get the string-value of a node as a String object
389       * (see http://www.w3.org/TR/xpath#data-model
390       * for the definition of a node's string-value).
391       *
392       * @param nodeHandle The node ID.
393       *
394       * @return A string object that represents the string-value of the given node.
395       */
396      public XMLString getStringValue(int nodeHandle);
397    
398      /**
399       * Get number of character array chunks in
400       * the string-value of a node.
401       * (see http://www.w3.org/TR/xpath#data-model
402       * for the definition of a node's string-value).
403       * Note that a single text node may have multiple text chunks.
404       *
405       * @param nodeHandle The node ID.
406       *
407       * @return number of character array chunks in
408       *         the string-value of a node.
409       */
410      public int getStringValueChunkCount(int nodeHandle);
411    
412      /**
413       * Get a character array chunk in the string-value of a node.
414       * (see http://www.w3.org/TR/xpath#data-model
415       * for the definition of a node's string-value).
416       * Note that a single text node may have multiple text chunks.
417       *
418       * @param nodeHandle The node ID.
419       * @param chunkIndex Which chunk to get.
420       * @param startAndLen  A two-integer array which, upon return, WILL
421       * BE FILLED with values representing the chunk's start position
422       * within the returned character buffer and the length of the chunk.
423       * @return The character array buffer within which the chunk occurs,
424       * setting startAndLen's contents as a side-effect.
425       */
426      public char[] getStringValueChunk(int nodeHandle, int chunkIndex,
427                                        int[] startAndLen);
428    
429      /**
430       * Given a node handle, return an ID that represents the node's expanded name.
431       *
432       * @param nodeHandle The handle to the node in question.
433       *
434       * @return the expanded-name id of the node.
435       */
436      public int getExpandedTypeID(int nodeHandle);
437    
438      /**
439       * Given an expanded name, return an ID.  If the expanded-name does not
440       * exist in the internal tables, the entry will be created, and the ID will
441       * be returned.  Any additional nodes that are created that have this
442       * expanded name will use this ID.
443       *
444       * NEEDSDOC @param namespace
445       * NEEDSDOC @param localName
446       * NEEDSDOC @param type
447       *
448       * @return the expanded-name id of the node.
449       */
450      public int getExpandedTypeID(String namespace, String localName, int type);
451    
452      /**
453       * Given an expanded-name ID, return the local name part.
454       *
455       * @param ExpandedNameID an ID that represents an expanded-name.
456       * @return String Local name of this node.
457       */
458      public String getLocalNameFromExpandedNameID(int ExpandedNameID);
459    
460      /**
461       * Given an expanded-name ID, return the namespace URI part.
462       *
463       * @param ExpandedNameID an ID that represents an expanded-name.
464       * @return String URI value of this node's namespace, or null if no
465       * namespace was resolved.
466       */
467      public String getNamespaceFromExpandedNameID(int ExpandedNameID);
468    
469      /**
470       * Given a node handle, return its DOM-style node name. This will
471       * include names such as #text or #document.
472       *
473       * @param nodeHandle the id of the node.
474       * @return String Name of this node, which may be an empty string.
475       * %REVIEW% Document when empty string is possible...
476       */
477      public String getNodeName(int nodeHandle);
478    
479      /**
480       * Given a node handle, return the XPath node name.  This should be
481       * the name as described by the XPath data model, NOT the DOM-style
482       * name.
483       *
484       * @param nodeHandle the id of the node.
485       * @return String Name of this node.
486       */
487      public String getNodeNameX(int nodeHandle);
488    
489      /**
490       * Given a node handle, return its DOM-style localname.
491       * (As defined in Namespaces, this is the portion of the name after the
492       * prefix, if present, or the whole node name if no prefix exists)
493       *
494       * @param nodeHandle the id of the node.
495       * @return String Local name of this node.
496       */
497      public String getLocalName(int nodeHandle);
498    
499      /**
500       * Given a namespace handle, return the prefix that the namespace decl is
501       * mapping.
502       * Given a node handle, return the prefix used to map to the namespace.
503       * (As defined in Namespaces, this is the portion of the name before any
504       * colon character).
505       *
506       * <p> %REVIEW% Are you sure you want "" for no prefix?  </p>
507       *
508       * @param nodeHandle the id of the node.
509       * @return String prefix of this node's name, or "" if no explicit
510       * namespace prefix was given.
511       */
512      public String getPrefix(int nodeHandle);
513    
514      /**
515       * Given a node handle, return its DOM-style namespace URI
516       * (As defined in Namespaces, this is the declared URI which this node's
517       * prefix -- or default in lieu thereof -- was mapped to.)
518       * @param nodeHandle the id of the node.
519       * @return String URI value of this node's namespace, or null if no
520       * namespace was resolved.
521       */
522      public String getNamespaceURI(int nodeHandle);
523    
524      /**
525       * Given a node handle, return its node value. This is mostly
526       * as defined by the DOM, but may ignore some conveniences.
527       * <p>
528       * @param nodeHandle The node id.
529       * @return String Value of this node, or null if not
530       * meaningful for this node type.
531       */
532      public String getNodeValue(int nodeHandle);
533    
534      /**
535       * Given a node handle, return its DOM-style node type.
536       *
537       * <p>%REVIEW% Generally, returning short is false economy. Return int?</p>
538       *
539       * @param nodeHandle The node id.
540       * @return int Node type, as per the DOM's Node._NODE constants.
541       */
542      public short getNodeType(int nodeHandle);
543    
544      /**
545       * Get the depth level of this node in the tree (equals 1 for
546       * a parentless node).
547       *
548       * @param nodeHandle The node id.
549       * @return the number of ancestors, plus one
550       * @xsl.usage internal
551       */
552      public short getLevel(int nodeHandle);
553    
554      // ============== Document query functions ==============
555    
556      /**
557       * Tests whether DTM DOM implementation implements a specific feature and
558       * that feature is supported by this node.
559       * @param feature The name of the feature to test.
560       * @param version This is the version number of the feature to test.
561       *   If the version is not
562       *   specified, supporting any version of the feature will cause the
563       *   method to return <code>true</code>.
564       * @return Returns <code>true</code> if the specified feature is
565       *   supported on this node, <code>false</code> otherwise.
566       */
567      public boolean isSupported(String feature, String version);
568    
569      /**
570       * Return the base URI of the document entity. If it is not known
571       * (because the document was parsed from a socket connection or from
572       * standard input, for example), the value of this property is unknown.
573       *
574       * @return the document base URI String object or null if unknown.
575       */
576      public String getDocumentBaseURI();
577    
578      /**
579       * Set the base URI of the document entity.
580       *
581       * @param baseURI the document base URI String object or null if unknown.
582       */
583      public void setDocumentBaseURI(String baseURI);
584    
585      /**
586       * Return the system identifier of the document entity. If
587       * it is not known, the value of this property is null.
588       *
589       * @param nodeHandle The node id, which can be any valid node handle.
590       * @return the system identifier String object or null if unknown.
591       */
592      public String getDocumentSystemIdentifier(int nodeHandle);
593    
594      /**
595       * Return the name of the character encoding scheme
596       *        in which the document entity is expressed.
597       *
598       * @param nodeHandle The node id, which can be any valid node handle.
599       * @return the document encoding String object.
600       */
601      public String getDocumentEncoding(int nodeHandle);
602    
603      /**
604       * Return an indication of the standalone status of the document,
605       *        either "yes" or "no". This property is derived from the optional
606       *        standalone document declaration in the XML declaration at the
607       *        beginning of the document entity, and has no value if there is no
608       *        standalone document declaration.
609       *
610       * @param nodeHandle The node id, which can be any valid node handle.
611       * @return the document standalone String object, either "yes", "no", or null.
612       */
613      public String getDocumentStandalone(int nodeHandle);
614    
615      /**
616       * Return a string representing the XML version of the document. This
617       * property is derived from the XML declaration optionally present at the
618       * beginning of the document entity, and has no value if there is no XML
619       * declaration.
620       *
621       * @param documentHandle the document handle
622       * @return the document version String object
623       */
624      public String getDocumentVersion(int documentHandle);
625    
626      /**
627       * Return an indication of
628       * whether the processor has read the complete DTD. Its value is a
629       * boolean. If it is false, then certain properties (indicated in their
630       * descriptions below) may be unknown. If it is true, those properties
631       * are never unknown.
632       *
633       * @return <code>true</code> if all declarations were processed;
634       *         <code>false</code> otherwise.
635       */
636      public boolean getDocumentAllDeclarationsProcessed();
637    
638      /**
639       *   A document type declaration information item has the following properties:
640       *
641       *     1. [system identifier] The system identifier of the external subset, if
642       *        it exists. Otherwise this property has no value.
643       *
644       * @return the system identifier String object, or null if there is none.
645       */
646      public String getDocumentTypeDeclarationSystemIdentifier();
647    
648      /**
649       * Return the public identifier of the external subset,
650       * normalized as described in 4.2.2 External Entities [XML]. If there is
651       * no external subset or if it has no public identifier, this property
652       * has no value.
653       *
654       * @return the public identifier String object, or null if there is none.
655       */
656      public String getDocumentTypeDeclarationPublicIdentifier();
657    
658      /**
659       * Returns the <code>Element</code> whose <code>ID</code> is given by
660       * <code>elementId</code>. If no such element exists, returns
661       * <code>DTM.NULL</code>. Behavior is not defined if more than one element
662       * has this <code>ID</code>. Attributes (including those
663       * with the name "ID") are not of type ID unless so defined by DTD/Schema
664       * information available to the DTM implementation.
665       * Implementations that do not know whether attributes are of type ID or
666       * not are expected to return <code>DTM.NULL</code>.
667       *
668       * <p>%REVIEW% Presumably IDs are still scoped to a single document,
669       * and this operation searches only within a single document, right?
670       * Wouldn't want collisions between DTMs in the same process.</p>
671       *
672       * @param elementId The unique <code>id</code> value for an element.
673       * @return The handle of the matching element.
674       */
675      public int getElementById(String elementId);
676    
677      /**
678       * The getUnparsedEntityURI function returns the URI of the unparsed
679       * entity with the specified name in the same document as the context
680       * node (see [3.3 Unparsed Entities]). It returns the empty string if
681       * there is no such entity.
682       * <p>
683       * XML processors may choose to use the System Identifier (if one
684       * is provided) to resolve the entity, rather than the URI in the
685       * Public Identifier. The details are dependent on the processor, and
686       * we would have to support some form of plug-in resolver to handle
687       * this properly. Currently, we simply return the System Identifier if
688       * present, and hope that it a usable URI or that our caller can
689       * map it to one.
690       * %REVIEW% Resolve Public Identifiers... or consider changing function name.
691       * <p>
692       * If we find a relative URI
693       * reference, XML expects it to be resolved in terms of the base URI
694       * of the document. The DOM doesn't do that for us, and it isn't
695       * entirely clear whether that should be done here; currently that's
696       * pushed up to a higher level of our application. (Note that DOM Level
697       * 1 didn't store the document's base URI.)
698       * %REVIEW% Consider resolving Relative URIs.
699       * <p>
700       * (The DOM's statement that "An XML processor may choose to
701       * completely expand entities before the structure model is passed
702       * to the DOM" refers only to parsed entities, not unparsed, and hence
703       * doesn't affect this function.)
704       *
705       * @param name A string containing the Entity Name of the unparsed
706       * entity.
707       *
708       * @return String containing the URI of the Unparsed Entity, or an
709       * empty string if no such entity exists.
710       */
711      public String getUnparsedEntityURI(String name);
712    
713      // ============== Boolean methods ================
714    
715      /**
716       * Return true if the xsl:strip-space or xsl:preserve-space was processed
717       * during construction of the document contained in this DTM.
718       *
719       * NEEDSDOC ($objectName$) @return
720       */
721      public boolean supportsPreStripping();
722    
723      /**
724       * Figure out whether nodeHandle2 should be considered as being later
725       * in the document than nodeHandle1, in Document Order as defined
726       * by the XPath model. This may not agree with the ordering defined
727       * by other XML applications.
728       * <p>
729       * There are some cases where ordering isn't defined, and neither are
730       * the results of this function -- though we'll generally return true.
731       * <p>
732       * %REVIEW% Make sure this does the right thing with attribute nodes!!!
733       * <p>
734       * %REVIEW% Consider renaming for clarity. Perhaps isDocumentOrder(a,b)?
735       *
736       * @param firstNodeHandle DOM Node to perform position comparison on.
737       * @param secondNodeHandle DOM Node to perform position comparison on.
738       *
739       * @return false if secondNode comes before firstNode, otherwise return true.
740       * You can think of this as
741       * <code>(firstNode.documentOrderPosition &lt;= secondNode.documentOrderPosition)</code>.
742       */
743      public boolean isNodeAfter(int firstNodeHandle, int secondNodeHandle);
744    
745      /**
746       * 2. [element content whitespace] A boolean indicating whether a
747       * text node represents white space appearing within element content
748       * (see [XML], 2.10 "White Space Handling").  Note that validating
749       * XML processors are required by XML 1.0 to provide this
750       * information... but that DOM Level 2 did not support it, since it
751       * depends on knowledge of the DTD which DOM2 could not guarantee
752       * would be available.
753       * <p>
754       * If there is no declaration for the containing element, an XML
755       * processor must assume that the whitespace could be meaningful and
756       * return false. If no declaration has been read, but the [all
757       * declarations processed] property of the document information item
758       * is false (so there may be an unread declaration), then the value
759       * of this property is indeterminate for white space characters and
760       * should probably be reported as false. It is always false for text
761       * nodes that contain anything other than (or in addition to) white
762       * space.
763       * <p>
764       * Note too that it always returns false for non-Text nodes.
765       * <p>
766       * %REVIEW% Joe wants to rename this isWhitespaceInElementContent() for clarity
767       *
768       * @param nodeHandle the node ID.
769       * @return <code>true</code> if the node definitely represents whitespace in
770       * element content; <code>false</code> otherwise.
771       */
772      public boolean isCharacterElementContentWhitespace(int nodeHandle);
773    
774      /**
775       *    10. [all declarations processed] This property is not strictly speaking
776       *        part of the infoset of the document. Rather it is an indication of
777       *        whether the processor has read the complete DTD. Its value is a
778       *        boolean. If it is false, then certain properties (indicated in their
779       *        descriptions below) may be unknown. If it is true, those properties
780       *        are never unknown.
781       *
782       * @param documentHandle A node handle that must identify a document.
783       * @return <code>true</code> if all declarations were processed;
784       *         <code>false</code> otherwise.
785       */
786      public boolean isDocumentAllDeclarationsProcessed(int documentHandle);
787    
788      /**
789       *     5. [specified] A flag indicating whether this attribute was actually
790       *        specified in the start-tag of its element, or was defaulted from the
791       *        DTD (or schema).
792       *
793       * @param attributeHandle The attribute handle
794       * @return <code>true</code> if the attribute was specified;
795       *         <code>false</code> if it was defaulted or the handle doesn't
796       *            refer to an attribute node.
797       */
798      public boolean isAttributeSpecified(int attributeHandle);
799    
800      // ========== Direct SAX Dispatch, for optimization purposes ========
801    
802      /**
803       * Directly call the
804       * characters method on the passed ContentHandler for the
805       * string-value of the given node (see http://www.w3.org/TR/xpath#data-model
806       * for the definition of a node's string-value). Multiple calls to the
807       * ContentHandler's characters methods may well occur for a single call to
808       * this method.
809       *
810       * @param nodeHandle The node ID.
811       * @param ch A non-null reference to a ContentHandler.
812       * @param normalize true if the content should be normalized according to
813       * the rules for the XPath
814       * <a href="http://www.w3.org/TR/xpath#function-normalize-space">normalize-space</a>
815       * function.
816       *
817       * @throws org.xml.sax.SAXException
818       */
819      public void dispatchCharactersEvents(
820        int nodeHandle, org.xml.sax.ContentHandler ch, boolean normalize)
821          throws org.xml.sax.SAXException;
822    
823      /**
824       * Directly create SAX parser events representing the XML content of
825       * a DTM subtree. This is a "serialize" operation.
826       *
827       * @param nodeHandle The node ID.
828       * @param ch A non-null reference to a ContentHandler.
829       *
830       * @throws org.xml.sax.SAXException
831       */
832      public void dispatchToEvents(int nodeHandle, org.xml.sax.ContentHandler ch)
833        throws org.xml.sax.SAXException;
834    
835      /**
836       * Return an DOM node for the given node.
837       *
838       * @param nodeHandle The node ID.
839       *
840       * @return A node representation of the DTM node.
841       */
842      public org.w3c.dom.Node getNode(int nodeHandle);
843    
844      // ==== Construction methods (may not be supported by some implementations!) =====
845      // %REVIEW% What response occurs if not supported?
846    
847      /**
848       * @return true iff we're building this model incrementally (eg
849       * we're partnered with a CoroutineParser) and thus require that the
850       * transformation and the parse run simultaneously. Guidance to the
851       * DTMManager.
852       */
853      public boolean needsTwoThreads();
854    
855      // %REVIEW% Do these appends make any sense, should we support a
856      // wider set of methods (like the "append" methods in the
857      // current DTMDocumentImpl draft), or should we just support SAX
858      // listener interfaces?  Should it be a separate interface to
859      // make that distinction explicit?
860    
861      /**
862       * Return this DTM's content handler, if it has one.
863       *
864       * @return null if this model doesn't respond to SAX events.
865       */
866      public org.xml.sax.ContentHandler getContentHandler();
867    
868      /**
869       * Return this DTM's lexical handler, if it has one.
870       *
871       * %REVIEW% Should this return null if constrution already done/begun?
872       *
873       * @return null if this model doesn't respond to lexical SAX events.
874       */
875      public org.xml.sax.ext.LexicalHandler getLexicalHandler();
876    
877      /**
878       * Return this DTM's EntityResolver, if it has one.
879       *
880       * @return null if this model doesn't respond to SAX entity ref events.
881       */
882      public org.xml.sax.EntityResolver getEntityResolver();
883    
884      /**
885       * Return this DTM's DTDHandler, if it has one.
886       *
887       * @return null if this model doesn't respond to SAX dtd events.
888       */
889      public org.xml.sax.DTDHandler getDTDHandler();
890    
891      /**
892       * Return this DTM's ErrorHandler, if it has one.
893       *
894       * @return null if this model doesn't respond to SAX error events.
895       */
896      public org.xml.sax.ErrorHandler getErrorHandler();
897    
898      /**
899       * Return this DTM's DeclHandler, if it has one.
900       *
901       * @return null if this model doesn't respond to SAX Decl events.
902       */
903      public org.xml.sax.ext.DeclHandler getDeclHandler();
904    
905      /**
906       * Append a child to "the end of the document". Please note that
907       * the node is always cloned in a base DTM, since our basic behavior
908       * is immutable so nodes can't be removed from their previous
909       * location.
910       *
911       * <p> %REVIEW%  DTM maintains an insertion cursor which
912       * performs a depth-first tree walk as nodes come in, and this operation
913       * is really equivalent to:
914       *    insertionCursor.appendChild(document.importNode(newChild)))
915       * where the insert point is the last element that was appended (or
916       * the last one popped back to by an end-element operation).</p>
917       *
918       * @param newChild Must be a valid new node handle.
919       * @param clone true if the child should be cloned into the document.
920       * @param cloneDepth if the clone argument is true, specifies that the
921       *                   clone should include all it's children.
922       */
923      public void appendChild(int newChild, boolean clone, boolean cloneDepth);
924    
925      /**
926       * Append a text node child that will be constructed from a string,
927       * to the end of the document. Behavior is otherwise like appendChild().
928       *
929       * @param str Non-null reference to a string.
930       */
931      public void appendTextChild(String str);
932    
933      /**
934       * Get the location of a node in the source document.
935       *
936       * @param node an <code>int</code> value
937       * @return a <code>SourceLocator</code> value or null if no location
938       * is available
939       */
940      public SourceLocator getSourceLocatorFor(int node);
941    
942      /**
943       * As the DTM is registered with the DTMManager, this method
944       * will be called. This will give the DTM implementation a
945       * chance to initialize any subsystems that are required to
946       * build the DTM
947       */
948      public void documentRegistration();
949    
950      /**
951       * As documents are released from the DTMManager, the DTM implementation
952       * will be notified of the event. This will allow the DTM implementation
953       * to shutdown any subsystem activity that may of been assoiated with
954       * the active DTM Implementation.
955       */
956    
957       public void documentRelease();
958    
959       /**
960        * Migrate a DTM built with an old DTMManager to a new DTMManager.
961        * After the migration, the new DTMManager will treat the DTM as
962        * one that is built by itself.
963        * This is used to support DTM sharing between multiple transformations.
964        * @param manager the DTMManager
965        */
966       public void migrateTo(DTMManager manager);
967    }