001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the "License");
007 * you may not use this file except in compliance with the License.
008 * You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 /*
019 * $Id: ToTextStream.java 468654 2006-10-28 07:09:23Z minchau $
020 */
021 package org.apache.xml.serializer;
022
023 import java.io.IOException;
024
025 import org.apache.xml.serializer.utils.MsgKey;
026 import org.apache.xml.serializer.utils.Utils;
027 import org.xml.sax.Attributes;
028 import org.xml.sax.SAXException;
029
030 /**
031 * This class is not a public API.
032 * It is only public because it is used in other packages.
033 * This class converts SAX or SAX-like calls to a
034 * serialized document for xsl:output method of "text".
035 * @xsl.usage internal
036 */
037 public class ToTextStream extends ToStream
038 {
039
040
041 /**
042 * Default constructor.
043 */
044 public ToTextStream()
045 {
046 super();
047 }
048
049
050
051 /**
052 * Receive notification of the beginning of a document.
053 *
054 * <p>The SAX parser will invoke this method only once, before any
055 * other methods in this interface or in DTDHandler (except for
056 * setDocumentLocator).</p>
057 *
058 * @throws org.xml.sax.SAXException Any SAX exception, possibly
059 * wrapping another exception.
060 *
061 * @throws org.xml.sax.SAXException
062 */
063 protected void startDocumentInternal() throws org.xml.sax.SAXException
064 {
065 super.startDocumentInternal();
066
067 m_needToCallStartDocument = false;
068
069 // No action for the moment.
070 }
071
072 /**
073 * Receive notification of the end of a document.
074 *
075 * <p>The SAX parser will invoke this method only once, and it will
076 * be the last method invoked during the parse. The parser shall
077 * not invoke this method until it has either abandoned parsing
078 * (because of an unrecoverable error) or reached the end of
079 * input.</p>
080 *
081 * @throws org.xml.sax.SAXException Any SAX exception, possibly
082 * wrapping another exception.
083 *
084 * @throws org.xml.sax.SAXException
085 */
086 public void endDocument() throws org.xml.sax.SAXException
087 {
088 flushPending();
089 flushWriter();
090 if (m_tracer != null)
091 super.fireEndDoc();
092 }
093
094 /**
095 * Receive notification of the beginning of an element.
096 *
097 * <p>The Parser will invoke this method at the beginning of every
098 * element in the XML document; there will be a corresponding
099 * endElement() event for every startElement() event (even when the
100 * element is empty). All of the element's content will be
101 * reported, in order, before the corresponding endElement()
102 * event.</p>
103 *
104 * <p>If the element name has a namespace prefix, the prefix will
105 * still be attached. Note that the attribute list provided will
106 * contain only attributes with explicit values (specified or
107 * defaulted): #IMPLIED attributes will be omitted.</p>
108 *
109 *
110 * @param namespaceURI The Namespace URI, or the empty string if the
111 * element has no Namespace URI or if Namespace
112 * processing is not being performed.
113 * @param localName The local name (without prefix), or the
114 * empty string if Namespace processing is not being
115 * performed.
116 * @param name The qualified name (with prefix), or the
117 * empty string if qualified names are not available.
118 * @param atts The attributes attached to the element, if any.
119 * @throws org.xml.sax.SAXException Any SAX exception, possibly
120 * wrapping another exception.
121 * @see #endElement
122 * @see org.xml.sax.AttributeList
123 *
124 * @throws org.xml.sax.SAXException
125 */
126 public void startElement(
127 String namespaceURI, String localName, String name, Attributes atts)
128 throws org.xml.sax.SAXException
129 {
130 // time to fire off startElement event
131 if (m_tracer != null) {
132 super.fireStartElem(name);
133 this.firePseudoAttributes();
134 }
135 return;
136 }
137
138 /**
139 * Receive notification of the end of an element.
140 *
141 * <p>The SAX parser will invoke this method at the end of every
142 * element in the XML document; there will be a corresponding
143 * startElement() event for every endElement() event (even when the
144 * element is empty).</p>
145 *
146 * <p>If the element name has a namespace prefix, the prefix will
147 * still be attached to the name.</p>
148 *
149 *
150 * @param namespaceURI The Namespace URI, or the empty string if the
151 * element has no Namespace URI or if Namespace
152 * processing is not being performed.
153 * @param localName The local name (without prefix), or the
154 * empty string if Namespace processing is not being
155 * performed.
156 * @param name The qualified name (with prefix), or the
157 * empty string if qualified names are not available.
158 * @throws org.xml.sax.SAXException Any SAX exception, possibly
159 * wrapping another exception.
160 *
161 * @throws org.xml.sax.SAXException
162 */
163 public void endElement(String namespaceURI, String localName, String name)
164 throws org.xml.sax.SAXException
165 {
166 if (m_tracer != null)
167 super.fireEndElem(name);
168 }
169
170 /**
171 * Receive notification of character data.
172 *
173 * <p>The Parser will call this method to report each chunk of
174 * character data. SAX parsers may return all contiguous character
175 * data in a single chunk, or they may split it into several
176 * chunks; however, all of the characters in any single event
177 * must come from the same external entity, so that the Locator
178 * provides useful information.</p>
179 *
180 * <p>The application must not attempt to read from the array
181 * outside of the specified range.</p>
182 *
183 * <p>Note that some parsers will report whitespace using the
184 * ignorableWhitespace() method rather than this one (validating
185 * parsers must do so).</p>
186 *
187 * @param ch The characters from the XML document.
188 * @param start The start position in the array.
189 * @param length The number of characters to read from the array.
190 * @throws org.xml.sax.SAXException Any SAX exception, possibly
191 * wrapping another exception.
192 * @see #ignorableWhitespace
193 * @see org.xml.sax.Locator
194 */
195 public void characters(char ch[], int start, int length)
196 throws org.xml.sax.SAXException
197 {
198
199 flushPending();
200
201 try
202 {
203 if (inTemporaryOutputState()) {
204 /* leave characters un-processed as we are
205 * creating temporary output, the output generated by
206 * this serializer will be input to a final serializer
207 * later on and it will do the processing in final
208 * output state (not temporary output state).
209 *
210 * A "temporary" ToTextStream serializer is used to
211 * evaluate attribute value templates (for example),
212 * and the result of evaluating such a thing
213 * is fed into a final serializer later on.
214 */
215 m_writer.write(ch, start, length);
216 }
217 else {
218 // In final output state we do process the characters!
219 writeNormalizedChars(ch, start, length, m_lineSepUse);
220 }
221
222 if (m_tracer != null)
223 super.fireCharEvent(ch, start, length);
224 }
225 catch(IOException ioe)
226 {
227 throw new SAXException(ioe);
228 }
229 }
230
231 /**
232 * If available, when the disable-output-escaping attribute is used,
233 * output raw text without escaping.
234 *
235 * @param ch The characters from the XML document.
236 * @param start The start position in the array.
237 * @param length The number of characters to read from the array.
238 *
239 * @throws org.xml.sax.SAXException Any SAX exception, possibly
240 * wrapping another exception.
241 */
242 public void charactersRaw(char ch[], int start, int length)
243 throws org.xml.sax.SAXException
244 {
245
246 try
247 {
248 writeNormalizedChars(ch, start, length, m_lineSepUse);
249 }
250 catch(IOException ioe)
251 {
252 throw new SAXException(ioe);
253 }
254 }
255
256 /**
257 * Normalize the characters, but don't escape. Different from
258 * SerializerToXML#writeNormalizedChars because it does not attempt to do
259 * XML escaping at all.
260 *
261 * @param ch The characters from the XML document.
262 * @param start The start position in the array.
263 * @param length The number of characters to read from the array.
264 * @param useLineSep true if the operating systems
265 * end-of-line separator should be output rather than a new-line character.
266 *
267 * @throws IOException
268 * @throws org.xml.sax.SAXException
269 */
270 void writeNormalizedChars(
271 final char ch[],
272 final int start,
273 final int length,
274 final boolean useLineSep)
275 throws IOException, org.xml.sax.SAXException
276 {
277 final String encoding = getEncoding();
278 final java.io.Writer writer = m_writer;
279 final int end = start + length;
280
281 /* copy a few "constants" before the loop for performance */
282 final char S_LINEFEED = CharInfo.S_LINEFEED;
283
284 // This for() loop always increments i by one at the end
285 // of the loop. Additional increments of i adjust for when
286 // two input characters (a high/low UTF16 surrogate pair)
287 // are processed.
288 for (int i = start; i < end; i++) {
289 final char c = ch[i];
290
291 if (S_LINEFEED == c && useLineSep) {
292 writer.write(m_lineSep, 0, m_lineSepLen);
293 // one input char processed
294 } else if (m_encodingInfo.isInEncoding(c)) {
295 writer.write(c);
296 // one input char processed
297 } else if (Encodings.isHighUTF16Surrogate(c)) {
298 final int codePoint = writeUTF16Surrogate(c, ch, i, end);
299 if (codePoint != 0) {
300 // I think we can just emit the message,
301 // not crash and burn.
302 final String integralValue = Integer.toString(codePoint);
303 final String msg = Utils.messages.createMessage(
304 MsgKey.ER_ILLEGAL_CHARACTER,
305 new Object[] { integralValue, encoding });
306
307 //Older behavior was to throw the message,
308 //but newer gentler behavior is to write a message to System.err
309 //throw new SAXException(msg);
310 System.err.println(msg);
311
312 }
313 i++; // two input chars processed
314 } else {
315 // Don't know what to do with this char, it is
316 // not in the encoding and not a high char in
317 // a surrogate pair, so write out as an entity ref
318 if (encoding != null) {
319 /* The output encoding is known,
320 * so somthing is wrong.
321 */
322
323 // not in the encoding, so write out a character reference
324 writer.write('&');
325 writer.write('#');
326 writer.write(Integer.toString(c));
327 writer.write(';');
328
329 // I think we can just emit the message,
330 // not crash and burn.
331 final String integralValue = Integer.toString(c);
332 final String msg = Utils.messages.createMessage(
333 MsgKey.ER_ILLEGAL_CHARACTER,
334 new Object[] { integralValue, encoding });
335
336 //Older behavior was to throw the message,
337 //but newer gentler behavior is to write a message to System.err
338 //throw new SAXException(msg);
339 System.err.println(msg);
340 } else {
341 /* The output encoding is not known,
342 * so just write it out as-is.
343 */
344 writer.write(c);
345 }
346
347 // one input char was processed
348 }
349 }
350 }
351
352 /**
353 * Receive notification of cdata.
354 *
355 * <p>The Parser will call this method to report each chunk of
356 * character data. SAX parsers may return all contiguous character
357 * data in a single chunk, or they may split it into several
358 * chunks; however, all of the characters in any single event
359 * must come from the same external entity, so that the Locator
360 * provides useful information.</p>
361 *
362 * <p>The application must not attempt to read from the array
363 * outside of the specified range.</p>
364 *
365 * <p>Note that some parsers will report whitespace using the
366 * ignorableWhitespace() method rather than this one (validating
367 * parsers must do so).</p>
368 *
369 * @param ch The characters from the XML document.
370 * @param start The start position in the array.
371 * @param length The number of characters to read from the array.
372 * @throws org.xml.sax.SAXException Any SAX exception, possibly
373 * wrapping another exception.
374 * @see #ignorableWhitespace
375 * @see org.xml.sax.Locator
376 */
377 public void cdata(char ch[], int start, int length)
378 throws org.xml.sax.SAXException
379 {
380 try
381 {
382 writeNormalizedChars(ch, start, length, m_lineSepUse);
383 if (m_tracer != null)
384 super.fireCDATAEvent(ch, start, length);
385 }
386 catch(IOException ioe)
387 {
388 throw new SAXException(ioe);
389 }
390 }
391
392 /**
393 * Receive notification of ignorable whitespace in element content.
394 *
395 * <p>Validating Parsers must use this method to report each chunk
396 * of ignorable whitespace (see the W3C XML 1.0 recommendation,
397 * section 2.10): non-validating parsers may also use this method
398 * if they are capable of parsing and using content models.</p>
399 *
400 * <p>SAX parsers may return all contiguous whitespace in a single
401 * chunk, or they may split it into several chunks; however, all of
402 * the characters in any single event must come from the same
403 * external entity, so that the Locator provides useful
404 * information.</p>
405 *
406 * <p>The application must not attempt to read from the array
407 * outside of the specified range.</p>
408 *
409 * @param ch The characters from the XML document.
410 * @param start The start position in the array.
411 * @param length The number of characters to read from the array.
412 * @throws org.xml.sax.SAXException Any SAX exception, possibly
413 * wrapping another exception.
414 * @see #characters
415 *
416 * @throws org.xml.sax.SAXException
417 */
418 public void ignorableWhitespace(char ch[], int start, int length)
419 throws org.xml.sax.SAXException
420 {
421
422 try
423 {
424 writeNormalizedChars(ch, start, length, m_lineSepUse);
425 }
426 catch(IOException ioe)
427 {
428 throw new SAXException(ioe);
429 }
430 }
431
432 /**
433 * Receive notification of a processing instruction.
434 *
435 * <p>The Parser will invoke this method once for each processing
436 * instruction found: note that processing instructions may occur
437 * before or after the main document element.</p>
438 *
439 * <p>A SAX parser should never report an XML declaration (XML 1.0,
440 * section 2.8) or a text declaration (XML 1.0, section 4.3.1)
441 * using this method.</p>
442 *
443 * @param target The processing instruction target.
444 * @param data The processing instruction data, or null if
445 * none was supplied.
446 * @throws org.xml.sax.SAXException Any SAX exception, possibly
447 * wrapping another exception.
448 *
449 * @throws org.xml.sax.SAXException
450 */
451 public void processingInstruction(String target, String data)
452 throws org.xml.sax.SAXException
453 {
454 // flush anything pending first
455 flushPending();
456
457 if (m_tracer != null)
458 super.fireEscapingEvent(target, data);
459 }
460
461 /**
462 * Called when a Comment is to be constructed.
463 * Note that Xalan will normally invoke the other version of this method.
464 * %REVIEW% In fact, is this one ever needed, or was it a mistake?
465 *
466 * @param data The comment data.
467 * @throws org.xml.sax.SAXException Any SAX exception, possibly
468 * wrapping another exception.
469 */
470 public void comment(String data) throws org.xml.sax.SAXException
471 {
472 final int length = data.length();
473 if (length > m_charsBuff.length)
474 {
475 m_charsBuff = new char[length*2 + 1];
476 }
477 data.getChars(0, length, m_charsBuff, 0);
478 comment(m_charsBuff, 0, length);
479 }
480
481 /**
482 * Report an XML comment anywhere in the document.
483 *
484 * This callback will be used for comments inside or outside the
485 * document element, including comments in the external DTD
486 * subset (if read).
487 *
488 * @param ch An array holding the characters in the comment.
489 * @param start The starting position in the array.
490 * @param length The number of characters to use from the array.
491 * @throws org.xml.sax.SAXException The application may raise an exception.
492 */
493 public void comment(char ch[], int start, int length)
494 throws org.xml.sax.SAXException
495 {
496
497 flushPending();
498 if (m_tracer != null)
499 super.fireCommentEvent(ch, start, length);
500 }
501
502 /**
503 * Receive notivication of a entityReference.
504 *
505 * @param name non-null reference to the name of the entity.
506 *
507 * @throws org.xml.sax.SAXException
508 */
509 public void entityReference(String name) throws org.xml.sax.SAXException
510 {
511 if (m_tracer != null)
512 super.fireEntityReference(name);
513 }
514
515 /**
516 * @see ExtendedContentHandler#addAttribute(String, String, String, String, String)
517 */
518 public void addAttribute(
519 String uri,
520 String localName,
521 String rawName,
522 String type,
523 String value,
524 boolean XSLAttribute)
525 {
526 // do nothing, just forget all about the attribute
527 }
528
529 /**
530 * @see org.xml.sax.ext.LexicalHandler#endCDATA()
531 */
532 public void endCDATA() throws SAXException
533 {
534 // do nothing
535 }
536
537 /**
538 * @see ExtendedContentHandler#endElement(String)
539 */
540 public void endElement(String elemName) throws SAXException
541 {
542 if (m_tracer != null)
543 super.fireEndElem(elemName);
544 }
545
546 /**
547 * From XSLTC
548 */
549 public void startElement(
550 String elementNamespaceURI,
551 String elementLocalName,
552 String elementName)
553 throws SAXException
554 {
555 if (m_needToCallStartDocument)
556 startDocumentInternal();
557 // time to fire off startlement event.
558 if (m_tracer != null) {
559 super.fireStartElem(elementName);
560 this.firePseudoAttributes();
561 }
562
563 return;
564 }
565
566
567 /**
568 * From XSLTC
569 */
570 public void characters(String characters)
571 throws SAXException
572 {
573 final int length = characters.length();
574 if (length > m_charsBuff.length)
575 {
576 m_charsBuff = new char[length*2 + 1];
577 }
578 characters.getChars(0, length, m_charsBuff, 0);
579 characters(m_charsBuff, 0, length);
580 }
581
582
583 /**
584 * From XSLTC
585 */
586 public void addAttribute(String name, String value)
587 {
588 // do nothing, forget about the attribute
589 }
590
591 /**
592 * Add a unique attribute
593 */
594 public void addUniqueAttribute(String qName, String value, int flags)
595 throws SAXException
596 {
597 // do nothing, forget about the attribute
598 }
599
600 public boolean startPrefixMapping(
601 String prefix,
602 String uri,
603 boolean shouldFlush)
604 throws SAXException
605 {
606 // no namespace support for HTML
607 return false;
608 }
609
610
611 public void startPrefixMapping(String prefix, String uri)
612 throws org.xml.sax.SAXException
613 {
614 // no namespace support for HTML
615 }
616
617
618 public void namespaceAfterStartElement(
619 final String prefix,
620 final String uri)
621 throws SAXException
622 {
623 // no namespace support for HTML
624 }
625
626 public void flushPending() throws org.xml.sax.SAXException
627 {
628 if (m_needToCallStartDocument)
629 {
630 startDocumentInternal();
631 m_needToCallStartDocument = false;
632 }
633 }
634 }