001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements. See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership. The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the  "License");
007     * you may not use this file except in compliance with the License.
008     * You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    /*
019     * $Id: DOMBuilder.java 1225373 2011-12-28 22:59:38Z mrglavas $
020     */
021    package org.apache.xml.utils;
022    
023    import java.util.Stack;
024    import java.util.Vector;
025    
026    import org.apache.xml.res.XMLErrorResources;
027    import org.apache.xml.res.XMLMessages;
028    
029    import org.w3c.dom.Document;
030    import org.w3c.dom.DocumentFragment;
031    import org.w3c.dom.Element;
032    import org.w3c.dom.Node;
033    import org.w3c.dom.Text;
034    import org.w3c.dom.CDATASection;
035    
036    import org.xml.sax.Attributes;
037    import org.xml.sax.ContentHandler;
038    import org.xml.sax.Locator;
039    import org.xml.sax.ext.LexicalHandler;
040    /**
041     * This class takes SAX events (in addition to some extra events
042     * that SAX doesn't handle yet) and adds the result to a document
043     * or document fragment.
044     * @xsl.usage general
045     */
046    public class DOMBuilder
047            implements ContentHandler, LexicalHandler
048    {
049    
050      /** Root document          */
051      public Document m_doc;
052    
053      /** Current node           */
054      protected Node m_currentNode = null;
055      
056      /** The root node          */
057      protected Node m_root = null;
058      
059      /** The next sibling node  */
060      protected Node m_nextSibling = null;
061    
062      /** First node of document fragment or null if not a DocumentFragment     */
063      public DocumentFragment m_docFrag = null;
064    
065      /** Vector of element nodes          */
066      protected Stack m_elemStack = new Stack();
067      
068      /** Namespace support */
069      protected Vector m_prefixMappings = new Vector();
070      
071      /**
072       * DOMBuilder instance constructor... it will add the DOM nodes
073       * to the document fragment.
074       *
075       * @param doc Root document
076       * @param node Current node
077       */
078      public DOMBuilder(Document doc, Node node)
079      {
080        m_doc = doc;
081        m_currentNode = m_root = node;
082        
083        if (node instanceof Element)
084          m_elemStack.push(node);
085      }
086    
087      /**
088       * DOMBuilder instance constructor... it will add the DOM nodes
089       * to the document fragment.
090       *
091       * @param doc Root document
092       * @param docFrag Document fragment
093       */
094      public DOMBuilder(Document doc, DocumentFragment docFrag)
095      {
096        m_doc = doc;
097        m_docFrag = docFrag;
098      }
099    
100      /**
101       * DOMBuilder instance constructor... it will add the DOM nodes
102       * to the document.
103       *
104       * @param doc Root document
105       */
106      public DOMBuilder(Document doc)
107      {
108        m_doc = doc;
109      }
110    
111      /**
112       * Get the root document or DocumentFragment of the DOM being created.
113       *
114       * @return The root document or document fragment if not null
115       */
116      public Node getRootDocument()
117      {
118        return (null != m_docFrag) ? (Node) m_docFrag : (Node) m_doc;
119      }
120      
121      /**
122       * Get the root node of the DOM tree.
123       */
124      public Node getRootNode()
125      {
126        return m_root;
127      }
128      
129      /**
130       * Get the node currently being processed.
131       *
132       * @return the current node being processed
133       */
134      public Node getCurrentNode()
135      {
136        return m_currentNode;
137      }
138      
139      /**
140       * Set the next sibling node, which is where the result nodes 
141       * should be inserted before.
142       * 
143       * @param nextSibling the next sibling node.
144       */
145      public void setNextSibling(Node nextSibling)
146      {
147        m_nextSibling = nextSibling;
148      }
149      
150      /**
151       * Return the next sibling node.
152       * 
153       * @return the next sibling node.
154       */
155      public Node getNextSibling()
156      {
157        return m_nextSibling;
158      }
159    
160      /**
161       * Return null since there is no Writer for this class.
162       *
163       * @return null
164       */
165      public java.io.Writer getWriter()
166      {
167        return null;
168      }
169    
170      /**
171       * Append a node to the current container.
172       *
173       * @param newNode New node to append
174       */
175      protected void append(Node newNode) throws org.xml.sax.SAXException
176      {
177    
178        Node currentNode = m_currentNode;
179    
180        if (null != currentNode)
181        {
182          if (currentNode == m_root && m_nextSibling != null)
183            currentNode.insertBefore(newNode, m_nextSibling);
184          else
185            currentNode.appendChild(newNode);
186    
187          // System.out.println(newNode.getNodeName());
188        }
189        else if (null != m_docFrag)
190        {
191          if (m_nextSibling != null)
192            m_docFrag.insertBefore(newNode, m_nextSibling);
193          else
194            m_docFrag.appendChild(newNode);
195        }
196        else
197        {
198          boolean ok = true;
199          short type = newNode.getNodeType();
200    
201          if (type == Node.TEXT_NODE)
202          {
203            String data = newNode.getNodeValue();
204    
205            if ((null != data) && (data.trim().length() > 0))
206            {
207              throw new org.xml.sax.SAXException(
208                XMLMessages.createXMLMessage(
209                  XMLErrorResources.ER_CANT_OUTPUT_TEXT_BEFORE_DOC, null));  //"Warning: can't output text before document element!  Ignoring...");
210            }
211    
212            ok = false;
213          }
214          else if (type == Node.ELEMENT_NODE)
215          {
216            if (m_doc.getDocumentElement() != null)
217            {
218              ok = false;
219              
220              throw new org.xml.sax.SAXException(
221                XMLMessages.createXMLMessage(
222                  XMLErrorResources.ER_CANT_HAVE_MORE_THAN_ONE_ROOT, null));  //"Can't have more than one root on a DOM!");
223            }
224          }
225    
226          if (ok)
227          {
228            if (m_nextSibling != null)
229              m_doc.insertBefore(newNode, m_nextSibling);
230            else
231              m_doc.appendChild(newNode);
232          }
233        }
234      }
235    
236      /**
237       * Receive an object for locating the origin of SAX document events.
238       *
239       * <p>SAX parsers are strongly encouraged (though not absolutely
240       * required) to supply a locator: if it does so, it must supply
241       * the locator to the application by invoking this method before
242       * invoking any of the other methods in the ContentHandler
243       * interface.</p>
244       *
245       * <p>The locator allows the application to determine the end
246       * position of any document-related event, even if the parser is
247       * not reporting an error.  Typically, the application will
248       * use this information for reporting its own errors (such as
249       * character content that does not match an application's
250       * business rules).  The information returned by the locator
251       * is probably not sufficient for use with a search engine.</p>
252       *
253       * <p>Note that the locator will return correct information only
254       * during the invocation of the events in this interface.  The
255       * application should not attempt to use it at any other time.</p>
256       *
257       * @param locator An object that can return the location of
258       *                any SAX document event.
259       * @see org.xml.sax.Locator
260       */
261      public void setDocumentLocator(Locator locator)
262      {
263    
264        // No action for the moment.
265      }
266    
267      /**
268       * Receive notification of the beginning of a document.
269       *
270       * <p>The SAX parser will invoke this method only once, before any
271       * other methods in this interface or in DTDHandler (except for
272       * setDocumentLocator).</p>
273       */
274      public void startDocument() throws org.xml.sax.SAXException
275      {
276    
277        // No action for the moment.
278      }
279    
280      /**
281       * Receive notification of the end of a document.
282       *
283       * <p>The SAX parser will invoke this method only once, and it will
284       * be the last method invoked during the parse.  The parser shall
285       * not invoke this method until it has either abandoned parsing
286       * (because of an unrecoverable error) or reached the end of
287       * input.</p>
288       */
289      public void endDocument() throws org.xml.sax.SAXException
290      {
291    
292        // No action for the moment.
293      }
294    
295      /**
296       * Receive notification of the beginning of an element.
297       *
298       * <p>The Parser will invoke this method at the beginning of every
299       * element in the XML document; there will be a corresponding
300       * endElement() event for every startElement() event (even when the
301       * element is empty). All of the element's content will be
302       * reported, in order, before the corresponding endElement()
303       * event.</p>
304       *
305       * <p>If the element name has a namespace prefix, the prefix will
306       * still be attached.  Note that the attribute list provided will
307       * contain only attributes with explicit values (specified or
308       * defaulted): #IMPLIED attributes will be omitted.</p>
309       *
310       *
311       * @param ns The namespace of the node
312       * @param localName The local part of the qualified name
313       * @param name The element name.
314       * @param atts The attributes attached to the element, if any.
315       * @see #endElement
316       * @see org.xml.sax.Attributes
317       */
318      public void startElement(
319              String ns, String localName, String name, Attributes atts)
320                throws org.xml.sax.SAXException
321      {
322    
323        Element elem;
324    
325            // Note that the namespace-aware call must be used to correctly
326            // construct a Level 2 DOM, even for non-namespaced nodes.
327        if ((null == ns) || (ns.length() == 0))
328          elem = m_doc.createElementNS(null,name);
329        else
330          elem = m_doc.createElementNS(ns, name);
331    
332        append(elem);
333    
334        try
335        {
336          int nAtts = atts.getLength();
337    
338          if (0 != nAtts)
339          {
340            for (int i = 0; i < nAtts; i++)
341            {
342    
343              //System.out.println("type " + atts.getType(i) + " name " + atts.getLocalName(i) );
344              // First handle a possible ID attribute
345              if (atts.getType(i).equalsIgnoreCase("ID"))
346                setIDAttribute(atts.getValue(i), elem);
347    
348              String attrNS = atts.getURI(i);
349    
350              if("".equals(attrNS))
351                attrNS = null; // DOM represents no-namespace as null
352    
353              // System.out.println("attrNS: "+attrNS+", localName: "+atts.getQName(i)
354              //                   +", qname: "+atts.getQName(i)+", value: "+atts.getValue(i));
355              // Crimson won't let us set an xmlns: attribute on the DOM.
356              String attrQName = atts.getQName(i);
357    
358              // In SAX, xmlns[:] attributes have an empty namespace, while in DOM they 
359              // should have the xmlns namespace
360              if (attrQName.startsWith("xmlns:") || attrQName.equals("xmlns")) {
361                attrNS = "http://www.w3.org/2000/xmlns/";
362              }
363    
364              // ALWAYS use the DOM Level 2 call!
365              elem.setAttributeNS(attrNS,attrQName, atts.getValue(i));
366            }
367          }
368          
369          /*
370           * Adding namespace nodes to the DOM tree;
371           */
372          int nDecls = m_prefixMappings.size();
373          
374          String prefix, declURL;
375          
376          for (int i = 0; i < nDecls; i += 2)
377          {
378            prefix = (String) m_prefixMappings.elementAt(i);
379    
380            if (prefix == null)
381              continue;
382    
383            declURL = (String) m_prefixMappings.elementAt(i + 1);
384    
385            elem.setAttributeNS("http://www.w3.org/2000/xmlns/", prefix, declURL);
386          }
387          
388          m_prefixMappings.clear();
389        
390          // append(elem);
391    
392          m_elemStack.push(elem);
393    
394          m_currentNode = elem;
395    
396          // append(elem);
397        }
398        catch(java.lang.Exception de)
399        {
400          // de.printStackTrace();
401          throw new org.xml.sax.SAXException(de);
402        }
403    
404      }
405    
406      /**
407    
408    
409    
410       * Receive notification of the end of an element.
411       *
412       * <p>The SAX parser will invoke this method at the end of every
413       * element in the XML document; there will be a corresponding
414       * startElement() event for every endElement() event (even when the
415       * element is empty).</p>
416       *
417       * <p>If the element name has a namespace prefix, the prefix will
418       * still be attached to the name.</p>
419       *
420       *
421       * @param ns the namespace of the element
422       * @param localName The local part of the qualified name of the element
423       * @param name The element name
424       */
425      public void endElement(String ns, String localName, String name)
426              throws org.xml.sax.SAXException
427      {
428        m_elemStack.pop();
429        m_currentNode = m_elemStack.isEmpty() ? null : (Node)m_elemStack.peek();
430      }
431    
432      /**
433       * Set an ID string to node association in the ID table.
434       *
435       * @param id The ID string.
436       * @param elem The associated ID.
437       */
438      public void setIDAttribute(String id, Element elem)
439      {
440    
441        // Do nothing. This method is meant to be overiden.
442      }
443    
444      /**
445       * Receive notification of character data.
446       *
447       * <p>The Parser will call this method to report each chunk of
448       * character data.  SAX parsers may return all contiguous character
449       * data in a single chunk, or they may split it into several
450       * chunks; however, all of the characters in any single event
451       * must come from the same external entity, so that the Locator
452       * provides useful information.</p>
453       *
454       * <p>The application must not attempt to read from the array
455       * outside of the specified range.</p>
456       *
457       * <p>Note that some parsers will report whitespace using the
458       * ignorableWhitespace() method rather than this one (validating
459       * parsers must do so).</p>
460       *
461       * @param ch The characters from the XML document.
462       * @param start The start position in the array.
463       * @param length The number of characters to read from the array.
464       * @see #ignorableWhitespace
465       * @see org.xml.sax.Locator
466       */
467      public void characters(char ch[], int start, int length) throws org.xml.sax.SAXException
468      {
469        if(isOutsideDocElem()
470           && org.apache.xml.utils.XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
471          return;  // avoid DOM006 Hierarchy request error
472    
473        if (m_inCData)
474        {
475          cdata(ch, start, length);
476    
477          return;
478        }
479    
480        String s = new String(ch, start, length);
481        Node childNode;
482        childNode =  m_currentNode != null ? m_currentNode.getLastChild(): null;
483        if( childNode != null && childNode.getNodeType() == Node.TEXT_NODE ){
484           ((Text)childNode).appendData(s);
485        }
486        else{
487           Text text = m_doc.createTextNode(s);
488           append(text);
489        }
490      }
491    
492      /**
493       * If available, when the disable-output-escaping attribute is used,
494       * output raw text without escaping.  A PI will be inserted in front
495       * of the node with the name "lotusxsl-next-is-raw" and a value of
496       * "formatter-to-dom".
497       *
498       * @param ch Array containing the characters
499       * @param start Index to start of characters in the array
500       * @param length Number of characters in the array
501       */
502      public void charactersRaw(char ch[], int start, int length)
503              throws org.xml.sax.SAXException
504      {
505        if(isOutsideDocElem()
506           && org.apache.xml.utils.XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
507          return;  // avoid DOM006 Hierarchy request error
508    
509    
510        String s = new String(ch, start, length);
511    
512        append(m_doc.createProcessingInstruction("xslt-next-is-raw",
513                                                 "formatter-to-dom"));
514        append(m_doc.createTextNode(s));
515      }
516    
517      /**
518       * Report the beginning of an entity.
519       *
520       * The start and end of the document entity are not reported.
521       * The start and end of the external DTD subset are reported
522       * using the pseudo-name "[dtd]".  All other events must be
523       * properly nested within start/end entity events.
524       *
525       * @param name The name of the entity.  If it is a parameter
526       *        entity, the name will begin with '%'.
527       * @see #endEntity
528       * @see org.xml.sax.ext.DeclHandler#internalEntityDecl
529       * @see org.xml.sax.ext.DeclHandler#externalEntityDecl
530       */
531      public void startEntity(String name) throws org.xml.sax.SAXException
532      {
533    
534        // Almost certainly the wrong behavior...
535        // entityReference(name);
536      }
537    
538      /**
539       * Report the end of an entity.
540       *
541       * @param name The name of the entity that is ending.
542       * @see #startEntity
543       */
544      public void endEntity(String name) throws org.xml.sax.SAXException{}
545    
546      /**
547       * Receive notivication of a entityReference.
548       *
549       * @param name name of the entity reference
550       */
551      public void entityReference(String name) throws org.xml.sax.SAXException
552      {
553        append(m_doc.createEntityReference(name));
554      }
555    
556      /**
557       * Receive notification of ignorable whitespace in element content.
558       *
559       * <p>Validating Parsers must use this method to report each chunk
560       * of ignorable whitespace (see the W3C XML 1.0 recommendation,
561       * section 2.10): non-validating parsers may also use this method
562       * if they are capable of parsing and using content models.</p>
563       *
564       * <p>SAX parsers may return all contiguous whitespace in a single
565       * chunk, or they may split it into several chunks; however, all of
566       * the characters in any single event must come from the same
567       * external entity, so that the Locator provides useful
568       * information.</p>
569       *
570       * <p>The application must not attempt to read from the array
571       * outside of the specified range.</p>
572       *
573       * @param ch The characters from the XML document.
574       * @param start The start position in the array.
575       * @param length The number of characters to read from the array.
576       * @see #characters
577       */
578      public void ignorableWhitespace(char ch[], int start, int length)
579              throws org.xml.sax.SAXException
580      {
581        if(isOutsideDocElem())
582          return;  // avoid DOM006 Hierarchy request error
583    
584        String s = new String(ch, start, length);
585    
586        append(m_doc.createTextNode(s));
587      }
588    
589      /**
590       * Tell if the current node is outside the document element.
591       *
592       * @return true if the current node is outside the document element.
593       */
594       private boolean isOutsideDocElem()
595       {
596          return (null == m_docFrag) && m_elemStack.size() == 0 && (null == m_currentNode || m_currentNode.getNodeType() == Node.DOCUMENT_NODE);
597       }
598    
599      /**
600       * Receive notification of a processing instruction.
601       *
602       * <p>The Parser will invoke this method once for each processing
603       * instruction found: note that processing instructions may occur
604       * before or after the main document element.</p>
605       *
606       * <p>A SAX parser should never report an XML declaration (XML 1.0,
607       * section 2.8) or a text declaration (XML 1.0, section 4.3.1)
608       * using this method.</p>
609       *
610       * @param target The processing instruction target.
611       * @param data The processing instruction data, or null if
612       *        none was supplied.
613       */
614      public void processingInstruction(String target, String data)
615              throws org.xml.sax.SAXException
616      {
617        append(m_doc.createProcessingInstruction(target, data));
618      }
619    
620      /**
621       * Report an XML comment anywhere in the document.
622       *
623       * This callback will be used for comments inside or outside the
624       * document element, including comments in the external DTD
625       * subset (if read).
626       *
627       * @param ch An array holding the characters in the comment.
628       * @param start The starting position in the array.
629       * @param length The number of characters to use from the array.
630       */
631      public void comment(char ch[], int start, int length) throws org.xml.sax.SAXException
632      {
633        append(m_doc.createComment(new String(ch, start, length)));
634      }
635    
636      /** Flag indicating that we are processing a CData section          */
637      protected boolean m_inCData = false;
638    
639      /**
640       * Report the start of a CDATA section.
641       *
642       * @see #endCDATA
643       */
644      public void startCDATA() throws org.xml.sax.SAXException
645      {
646        m_inCData = true;
647        append(m_doc.createCDATASection(""));
648      }
649    
650      /**
651       * Report the end of a CDATA section.
652       *
653       * @see #startCDATA
654       */
655      public void endCDATA() throws org.xml.sax.SAXException
656      {
657        m_inCData = false;
658      }
659    
660      /**
661       * Receive notification of cdata.
662       *
663       * <p>The Parser will call this method to report each chunk of
664       * character data.  SAX parsers may return all contiguous character
665       * data in a single chunk, or they may split it into several
666       * chunks; however, all of the characters in any single event
667       * must come from the same external entity, so that the Locator
668       * provides useful information.</p>
669       *
670       * <p>The application must not attempt to read from the array
671       * outside of the specified range.</p>
672       *
673       * <p>Note that some parsers will report whitespace using the
674       * ignorableWhitespace() method rather than this one (validating
675       * parsers must do so).</p>
676       *
677       * @param ch The characters from the XML document.
678       * @param start The start position in the array.
679       * @param length The number of characters to read from the array.
680       * @see #ignorableWhitespace
681       * @see org.xml.sax.Locator
682       */
683      public void cdata(char ch[], int start, int length) throws org.xml.sax.SAXException
684      {
685        if(isOutsideDocElem()
686           && org.apache.xml.utils.XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
687          return;  // avoid DOM006 Hierarchy request error
688    
689        String s = new String(ch, start, length);
690    
691        CDATASection section  =(CDATASection) m_currentNode.getLastChild();
692        section.appendData(s);
693      }
694    
695      /**
696       * Report the start of DTD declarations, if any.
697       *
698       * Any declarations are assumed to be in the internal subset
699       * unless otherwise indicated.
700       *
701       * @param name The document type name.
702       * @param publicId The declared public identifier for the
703       *        external DTD subset, or null if none was declared.
704       * @param systemId The declared system identifier for the
705       *        external DTD subset, or null if none was declared.
706       * @see #endDTD
707       * @see #startEntity
708       */
709      public void startDTD(String name, String publicId, String systemId)
710              throws org.xml.sax.SAXException
711      {
712    
713        // Do nothing for now.
714      }
715    
716      /**
717       * Report the end of DTD declarations.
718       *
719       * @see #startDTD
720       */
721      public void endDTD() throws org.xml.sax.SAXException
722      {
723    
724        // Do nothing for now.
725      }
726    
727      /**
728       * Begin the scope of a prefix-URI Namespace mapping.
729       *
730       * <p>The information from this event is not necessary for
731       * normal Namespace processing: the SAX XML reader will
732       * automatically replace prefixes for element and attribute
733       * names when the http://xml.org/sax/features/namespaces
734       * feature is true (the default).</p>
735       *
736       * <p>There are cases, however, when applications need to
737       * use prefixes in character data or in attribute values,
738       * where they cannot safely be expanded automatically; the
739       * start/endPrefixMapping event supplies the information
740       * to the application to expand prefixes in those contexts
741       * itself, if necessary.</p>
742       *
743       * <p>Note that start/endPrefixMapping events are not
744       * guaranteed to be properly nested relative to each-other:
745       * all startPrefixMapping events will occur before the
746       * corresponding startElement event, and all endPrefixMapping
747       * events will occur after the corresponding endElement event,
748       * but their order is not guaranteed.</p>
749       *
750       * @param prefix The Namespace prefix being declared.
751       * @param uri The Namespace URI the prefix is mapped to.
752       * @see #endPrefixMapping
753       * @see #startElement
754       */
755      public void startPrefixMapping(String prefix, String uri)
756              throws org.xml.sax.SAXException
757      {
758                  if(null == prefix || prefix.length() == 0)
759                    prefix = "xmlns";
760                  else prefix = "xmlns:"+prefix;
761                  m_prefixMappings.addElement(prefix);
762                  m_prefixMappings.addElement(uri); 
763      }
764    
765      /**
766       * End the scope of a prefix-URI mapping.
767       *
768       * <p>See startPrefixMapping for details.  This event will
769       * always occur after the corresponding endElement event,
770       * but the order of endPrefixMapping events is not otherwise
771       * guaranteed.</p>
772       *
773       * @param prefix The prefix that was being mapping.
774       * @see #startPrefixMapping
775       * @see #endElement
776       */
777      public void endPrefixMapping(String prefix) throws org.xml.sax.SAXException{}
778    
779      /**
780       * Receive notification of a skipped entity.
781       *
782       * <p>The Parser will invoke this method once for each entity
783       * skipped.  Non-validating processors may skip entities if they
784       * have not seen the declarations (because, for example, the
785       * entity was declared in an external DTD subset).  All processors
786       * may skip external entities, depending on the values of the
787       * http://xml.org/sax/features/external-general-entities and the
788       * http://xml.org/sax/features/external-parameter-entities
789       * properties.</p>
790       *
791       * @param name The name of the skipped entity.  If it is a
792       *        parameter entity, the name will begin with '%'.
793       */
794      public void skippedEntity(String name) throws org.xml.sax.SAXException{}
795    }