001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements. See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership. The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the  "License");
007     * you may not use this file except in compliance with the License.
008     * You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    /*
019     * $Id: ToTextStream.java 468654 2006-10-28 07:09:23Z minchau $
020     */
021    package org.apache.xml.serializer;
022    
023    import java.io.IOException;
024    
025    import org.apache.xml.serializer.utils.MsgKey;
026    import org.apache.xml.serializer.utils.Utils;
027    import org.xml.sax.Attributes;
028    import org.xml.sax.SAXException;
029    
030    /**
031     * This class is not a public API.
032     * It is only public because it is used in other packages. 
033     * This class converts SAX or SAX-like calls to a 
034     * serialized document for xsl:output method of "text".
035     * @xsl.usage internal
036     */
037    public class ToTextStream extends ToStream 
038    { 
039    
040           
041      /**
042       * Default constructor.
043       */
044      public ToTextStream()
045      {
046        super();
047      }
048    
049     
050     
051      /**
052       * Receive notification of the beginning of a document.
053       *
054       * <p>The SAX parser will invoke this method only once, before any
055       * other methods in this interface or in DTDHandler (except for
056       * setDocumentLocator).</p>
057       *
058       * @throws org.xml.sax.SAXException Any SAX exception, possibly
059       *            wrapping another exception.
060       *
061       * @throws org.xml.sax.SAXException
062       */
063      protected void startDocumentInternal() throws org.xml.sax.SAXException
064      {
065        super.startDocumentInternal();
066    
067        m_needToCallStartDocument = false;
068    
069        // No action for the moment.
070      }
071    
072      /**
073       * Receive notification of the end of a document.
074       *
075       * <p>The SAX parser will invoke this method only once, and it will
076       * be the last method invoked during the parse.  The parser shall
077       * not invoke this method until it has either abandoned parsing
078       * (because of an unrecoverable error) or reached the end of
079       * input.</p>
080       *
081       * @throws org.xml.sax.SAXException Any SAX exception, possibly
082       *            wrapping another exception.
083       *
084       * @throws org.xml.sax.SAXException
085       */
086      public void endDocument() throws org.xml.sax.SAXException
087      {
088        flushPending();
089        flushWriter();
090        if (m_tracer != null)
091            super.fireEndDoc();
092      }
093    
094      /**
095       * Receive notification of the beginning of an element.
096       *
097       * <p>The Parser will invoke this method at the beginning of every
098       * element in the XML document; there will be a corresponding
099       * endElement() event for every startElement() event (even when the
100       * element is empty). All of the element's content will be
101       * reported, in order, before the corresponding endElement()
102       * event.</p>
103       *
104       * <p>If the element name has a namespace prefix, the prefix will
105       * still be attached.  Note that the attribute list provided will
106       * contain only attributes with explicit values (specified or
107       * defaulted): #IMPLIED attributes will be omitted.</p>
108       *
109       *
110       * @param namespaceURI The Namespace URI, or the empty string if the
111       *        element has no Namespace URI or if Namespace
112       *        processing is not being performed.
113       * @param localName The local name (without prefix), or the
114       *        empty string if Namespace processing is not being
115       *        performed.
116       * @param name The qualified name (with prefix), or the
117       *        empty string if qualified names are not available.
118       * @param atts The attributes attached to the element, if any.
119       * @throws org.xml.sax.SAXException Any SAX exception, possibly
120       *            wrapping another exception.
121       * @see #endElement
122       * @see org.xml.sax.AttributeList
123       *
124       * @throws org.xml.sax.SAXException
125       */
126      public void startElement(
127              String namespaceURI, String localName, String name, Attributes atts)
128                throws org.xml.sax.SAXException
129      {
130        // time to fire off startElement event
131        if (m_tracer != null) {
132            super.fireStartElem(name);
133            this.firePseudoAttributes();
134        }
135        return;
136      }
137    
138      /**
139       * Receive notification of the end of an element.
140       *
141       * <p>The SAX parser will invoke this method at the end of every
142       * element in the XML document; there will be a corresponding
143       * startElement() event for every endElement() event (even when the
144       * element is empty).</p>
145       *
146       * <p>If the element name has a namespace prefix, the prefix will
147       * still be attached to the name.</p>
148       *
149       *
150       * @param namespaceURI The Namespace URI, or the empty string if the
151       *        element has no Namespace URI or if Namespace
152       *        processing is not being performed.
153       * @param localName The local name (without prefix), or the
154       *        empty string if Namespace processing is not being
155       *        performed.
156       * @param name The qualified name (with prefix), or the
157       *        empty string if qualified names are not available.
158       * @throws org.xml.sax.SAXException Any SAX exception, possibly
159       *            wrapping another exception.
160       *
161       * @throws org.xml.sax.SAXException
162       */
163      public void endElement(String namespaceURI, String localName, String name)
164              throws org.xml.sax.SAXException
165      {
166            if (m_tracer != null)
167                super.fireEndElem(name);           
168      }
169    
170      /**
171       * Receive notification of character data.
172       *
173       * <p>The Parser will call this method to report each chunk of
174       * character data.  SAX parsers may return all contiguous character
175       * data in a single chunk, or they may split it into several
176       * chunks; however, all of the characters in any single event
177       * must come from the same external entity, so that the Locator
178       * provides useful information.</p>
179       *
180       * <p>The application must not attempt to read from the array
181       * outside of the specified range.</p>
182       *
183       * <p>Note that some parsers will report whitespace using the
184       * ignorableWhitespace() method rather than this one (validating
185       * parsers must do so).</p>
186       *
187       * @param ch The characters from the XML document.
188       * @param start The start position in the array.
189       * @param length The number of characters to read from the array.
190       * @throws org.xml.sax.SAXException Any SAX exception, possibly
191       *            wrapping another exception.
192       * @see #ignorableWhitespace
193       * @see org.xml.sax.Locator
194       */
195      public void characters(char ch[], int start, int length)
196              throws org.xml.sax.SAXException
197      {
198    
199        flushPending();    
200        
201        try
202        {
203            if (inTemporaryOutputState()) {
204                /* leave characters un-processed as we are
205                 * creating temporary output, the output generated by
206                 * this serializer will be input to a final serializer 
207                 * later on and it will do the processing in final
208                 * output state (not temporary output state).
209                 * 
210                 * A "temporary" ToTextStream serializer is used to
211                 * evaluate attribute value templates (for example),
212                 * and the result of evaluating such a thing
213                 * is fed into a final serializer later on.
214                 */
215                m_writer.write(ch, start, length);
216            }
217            else {
218                // In final output state we do process the characters!
219                writeNormalizedChars(ch, start, length, m_lineSepUse);
220            }
221                
222            if (m_tracer != null)
223                super.fireCharEvent(ch, start, length);      
224        }
225        catch(IOException ioe)
226        {
227          throw new SAXException(ioe);
228        }
229      }
230    
231      /**
232       * If available, when the disable-output-escaping attribute is used,
233       * output raw text without escaping.
234       *
235       * @param ch The characters from the XML document.
236       * @param start The start position in the array.
237       * @param length The number of characters to read from the array.
238       *
239       * @throws org.xml.sax.SAXException Any SAX exception, possibly
240       *            wrapping another exception.
241       */
242      public void charactersRaw(char ch[], int start, int length)
243              throws org.xml.sax.SAXException
244      {
245    
246        try
247        {
248          writeNormalizedChars(ch, start, length, m_lineSepUse);
249        }
250        catch(IOException ioe)
251        {
252          throw new SAXException(ioe);
253        }
254      }
255      
256        /**
257         * Normalize the characters, but don't escape.  Different from 
258         * SerializerToXML#writeNormalizedChars because it does not attempt to do 
259         * XML escaping at all.
260         *
261         * @param ch The characters from the XML document.
262         * @param start The start position in the array.
263         * @param length The number of characters to read from the array.
264         * @param useLineSep true if the operating systems 
265         * end-of-line separator should be output rather than a new-line character.
266         * 
267         * @throws IOException
268         * @throws org.xml.sax.SAXException
269         */
270        void writeNormalizedChars(
271            final char ch[],
272                final int start,
273                final int length,
274                final boolean useLineSep)
275                throws IOException, org.xml.sax.SAXException 
276        {
277            final String encoding = getEncoding();
278            final java.io.Writer writer = m_writer;
279            final int end = start + length;
280    
281            /* copy a few "constants" before the loop for performance */
282            final char S_LINEFEED = CharInfo.S_LINEFEED;
283    
284            // This for() loop always increments i by one at the end
285            // of the loop.  Additional increments of i adjust for when
286            // two input characters (a high/low UTF16 surrogate pair)
287            // are processed.
288            for (int i = start; i < end; i++) {
289                final char c = ch[i];
290    
291                if (S_LINEFEED == c && useLineSep) {
292                    writer.write(m_lineSep, 0, m_lineSepLen);
293                    // one input char processed
294                } else if (m_encodingInfo.isInEncoding(c)) {
295                    writer.write(c);
296                    // one input char processed    
297                } else if (Encodings.isHighUTF16Surrogate(c)) {
298                    final int codePoint = writeUTF16Surrogate(c, ch, i, end);
299                    if (codePoint != 0) {
300                        // I think we can just emit the message,
301                        // not crash and burn.
302                        final String integralValue = Integer.toString(codePoint);
303                        final String msg = Utils.messages.createMessage(
304                            MsgKey.ER_ILLEGAL_CHARACTER,
305                            new Object[] { integralValue, encoding });
306                          
307                        //Older behavior was to throw the message,
308                        //but newer gentler behavior is to write a message to System.err
309                        //throw new SAXException(msg);
310                        System.err.println(msg);                            
311    
312                    }
313                    i++; // two input chars processed               
314                } else {
315                    // Don't know what to do with this char, it is
316                    // not in the encoding and not a high char in
317                    // a surrogate pair, so write out as an entity ref
318                    if (encoding != null) {
319                        /* The output encoding is known, 
320                         * so somthing is wrong.
321                         */
322    
323                        // not in the encoding, so write out a character reference
324                        writer.write('&');
325                        writer.write('#');
326                        writer.write(Integer.toString(c));
327                        writer.write(';');
328    
329                        // I think we can just emit the message,
330                        // not crash and burn.
331                        final String integralValue = Integer.toString(c);
332                        final String msg = Utils.messages.createMessage(
333                            MsgKey.ER_ILLEGAL_CHARACTER,
334                            new Object[] { integralValue, encoding });
335                          
336                        //Older behavior was to throw the message,
337                        //but newer gentler behavior is to write a message to System.err
338                        //throw new SAXException(msg);
339                        System.err.println(msg); 
340                    } else {
341                        /* The output encoding is not known,
342                         * so just write it out as-is.
343                         */
344                        writer.write(c);
345                    }
346    
347                    // one input char was processed
348                }
349            }
350        }
351    
352      /**
353       * Receive notification of cdata.
354       *
355       * <p>The Parser will call this method to report each chunk of
356       * character data.  SAX parsers may return all contiguous character
357       * data in a single chunk, or they may split it into several
358       * chunks; however, all of the characters in any single event
359       * must come from the same external entity, so that the Locator
360       * provides useful information.</p>
361       *
362       * <p>The application must not attempt to read from the array
363       * outside of the specified range.</p>
364       *
365       * <p>Note that some parsers will report whitespace using the
366       * ignorableWhitespace() method rather than this one (validating
367       * parsers must do so).</p>
368       *
369       * @param ch The characters from the XML document.
370       * @param start The start position in the array.
371       * @param length The number of characters to read from the array.
372       * @throws org.xml.sax.SAXException Any SAX exception, possibly
373       *            wrapping another exception.
374       * @see #ignorableWhitespace
375       * @see org.xml.sax.Locator
376       */
377      public void cdata(char ch[], int start, int length)
378              throws org.xml.sax.SAXException
379      {
380        try
381        {
382            writeNormalizedChars(ch, start, length, m_lineSepUse);
383            if (m_tracer != null)
384                super.fireCDATAEvent(ch, start, length);              
385        }
386        catch(IOException ioe)
387        {
388          throw new SAXException(ioe);
389        }
390      }
391    
392      /**
393       * Receive notification of ignorable whitespace in element content.
394       *
395       * <p>Validating Parsers must use this method to report each chunk
396       * of ignorable whitespace (see the W3C XML 1.0 recommendation,
397       * section 2.10): non-validating parsers may also use this method
398       * if they are capable of parsing and using content models.</p>
399       *
400       * <p>SAX parsers may return all contiguous whitespace in a single
401       * chunk, or they may split it into several chunks; however, all of
402       * the characters in any single event must come from the same
403       * external entity, so that the Locator provides useful
404       * information.</p>
405       *
406       * <p>The application must not attempt to read from the array
407       * outside of the specified range.</p>
408       *
409       * @param ch The characters from the XML document.
410       * @param start The start position in the array.
411       * @param length The number of characters to read from the array.
412       * @throws org.xml.sax.SAXException Any SAX exception, possibly
413       *            wrapping another exception.
414       * @see #characters
415       *
416       * @throws org.xml.sax.SAXException
417       */
418      public void ignorableWhitespace(char ch[], int start, int length)
419              throws org.xml.sax.SAXException
420      {
421    
422        try
423        {
424          writeNormalizedChars(ch, start, length, m_lineSepUse);
425        }
426        catch(IOException ioe)
427        {
428          throw new SAXException(ioe);
429        }
430      }
431    
432      /**
433       * Receive notification of a processing instruction.
434       *
435       * <p>The Parser will invoke this method once for each processing
436       * instruction found: note that processing instructions may occur
437       * before or after the main document element.</p>
438       *
439       * <p>A SAX parser should never report an XML declaration (XML 1.0,
440       * section 2.8) or a text declaration (XML 1.0, section 4.3.1)
441       * using this method.</p>
442       *
443       * @param target The processing instruction target.
444       * @param data The processing instruction data, or null if
445       *        none was supplied.
446       * @throws org.xml.sax.SAXException Any SAX exception, possibly
447       *            wrapping another exception.
448       *
449       * @throws org.xml.sax.SAXException
450       */
451      public void processingInstruction(String target, String data)
452              throws org.xml.sax.SAXException
453      {
454        // flush anything pending first
455        flushPending();  
456        
457        if (m_tracer != null)
458            super.fireEscapingEvent(target, data);  
459      }
460    
461      /**
462       * Called when a Comment is to be constructed.
463       * Note that Xalan will normally invoke the other version of this method.
464       * %REVIEW% In fact, is this one ever needed, or was it a mistake?
465       *
466       * @param   data  The comment data.
467       * @throws org.xml.sax.SAXException Any SAX exception, possibly
468       *            wrapping another exception.
469       */
470      public void comment(String data) throws org.xml.sax.SAXException
471      {
472          final int length = data.length();
473          if (length > m_charsBuff.length)
474          {
475              m_charsBuff = new char[length*2 + 1];
476          }
477          data.getChars(0, length, m_charsBuff, 0);
478          comment(m_charsBuff, 0, length);
479      }
480    
481      /**
482       * Report an XML comment anywhere in the document.
483       *
484       * This callback will be used for comments inside or outside the
485       * document element, including comments in the external DTD
486       * subset (if read).
487       *
488       * @param ch An array holding the characters in the comment.
489       * @param start The starting position in the array.
490       * @param length The number of characters to use from the array.
491       * @throws org.xml.sax.SAXException The application may raise an exception.
492       */
493      public void comment(char ch[], int start, int length)
494              throws org.xml.sax.SAXException
495      {
496    
497        flushPending();
498        if (m_tracer != null)
499            super.fireCommentEvent(ch, start, length);
500      }
501    
502      /**
503       * Receive notivication of a entityReference.
504       *
505       * @param name non-null reference to the name of the entity.
506       *
507       * @throws org.xml.sax.SAXException
508       */
509      public void entityReference(String name) throws org.xml.sax.SAXException
510      {
511            if (m_tracer != null)
512                super.fireEntityReference(name);    
513      }
514      
515        /**
516         * @see ExtendedContentHandler#addAttribute(String, String, String, String, String)
517         */
518        public void addAttribute(
519            String uri,
520            String localName,
521            String rawName,
522            String type,
523            String value,
524            boolean XSLAttribute)
525        {
526            // do nothing, just forget all about the attribute
527        }
528     
529        /**
530         * @see org.xml.sax.ext.LexicalHandler#endCDATA()
531         */
532        public void endCDATA() throws SAXException
533        {
534            // do nothing
535        }
536    
537        /**
538         * @see ExtendedContentHandler#endElement(String)
539         */
540        public void endElement(String elemName) throws SAXException
541        {
542            if (m_tracer != null)
543                super.fireEndElem(elemName);                       
544        }
545     
546        /**
547         * From XSLTC
548         */
549        public void startElement(
550        String elementNamespaceURI,
551        String elementLocalName,
552        String elementName) 
553        throws SAXException 
554        {
555            if (m_needToCallStartDocument)
556                startDocumentInternal();        
557            // time to fire off startlement event.
558            if (m_tracer != null) {
559                super.fireStartElem(elementName);
560                this.firePseudoAttributes();
561            }
562            
563            return;
564        }
565    
566    
567        /**
568         * From XSLTC
569         */
570        public void characters(String characters) 
571        throws SAXException 
572        { 
573            final int length = characters.length();
574            if (length > m_charsBuff.length)
575            {
576                m_charsBuff = new char[length*2 + 1];
577            }
578            characters.getChars(0, length, m_charsBuff, 0);
579            characters(m_charsBuff, 0, length); 
580        }
581    
582    
583        /**
584         * From XSLTC
585         */
586        public void addAttribute(String name, String value)
587        {
588            // do nothing, forget about the attribute
589        }
590        
591        /**
592         * Add a unique attribute
593         */
594        public void addUniqueAttribute(String qName, String value, int flags)
595            throws SAXException
596        {
597            // do nothing, forget about the attribute 
598        }
599    
600        public boolean startPrefixMapping(
601            String prefix,
602            String uri,
603            boolean shouldFlush)
604            throws SAXException
605        {
606            // no namespace support for HTML
607            return false;
608        }
609    
610    
611        public void startPrefixMapping(String prefix, String uri)
612            throws org.xml.sax.SAXException
613        {
614            // no namespace support for HTML
615        }
616    
617    
618        public void namespaceAfterStartElement(
619            final String prefix,
620            final String uri)
621            throws SAXException
622        {
623            // no namespace support for HTML
624        }    
625    
626        public void flushPending() throws org.xml.sax.SAXException
627        {
628                if (m_needToCallStartDocument)
629                {
630                    startDocumentInternal();
631                    m_needToCallStartDocument = false;
632                }
633        }
634    }