Xalan-C++ API Reference  1.12.0
XalanUTF8Writer.hpp
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 #if !defined(XALANUTF8WRITER_HEADER_GUARD_1357924680)
19 #define XALANUTF8WRITER_HEADER_GUARD_1357924680
20 
21 
23 
24 
25 
26 namespace XALAN_CPP_NAMESPACE {
27 
28 
29 
30 inline char
31 bits19to21(XalanUnicodeChar theChar)
32 {
33  return static_cast<char>((theChar >> 18) & 0x7);
34 }
35 
36 
37 
38 inline char
39 bits13to18(XalanUnicodeChar theChar)
40 {
41  return static_cast<char>((theChar >> 12) & 0x3F);
42 }
43 
44 
45 
46 inline char
47 bits13to16(XalanUnicodeChar theChar)
48 {
49  return static_cast<char>((theChar >> 12) & 0xF);
50 }
51 
52 
53 
54 inline char
55 bits7to12(XalanUnicodeChar theChar)
56 {
57  return static_cast<char>((theChar >> 6) & 0x3f);
58 }
59 
60 
61 
62 inline char
63 bits7to11(XalanUnicodeChar theChar)
64 {
65  return static_cast<char>((theChar >> 6) & 0x1f);
66 }
67 
68 
69 
70 inline char
71 bits1to6(XalanUnicodeChar theChar)
72 {
73  return static_cast<char>(theChar & 0x3f);
74 }
75 
76 
77 
78 inline char
79 leadingByteOf2(char theBits)
80 {
81  return static_cast<char>(0xC0 + theBits);
82 }
83 
84 
85 
86 inline char
87 leadingByteOf3(char theBits)
88 {
89  return static_cast<char>(0xE0 + theBits);
90 }
91 
92 
93 
94 inline char
95 leadingByteOf4(char theBits)
96 {
97  return static_cast<char>(0xF0 + theBits);
98 }
99 
100 
101 
102 inline char
103 trailingByte(char theBits)
104 {
105  return static_cast<char>(0x80 + theBits);
106 }
107 
108 
109 
111 {
112 public:
113 
114  typedef char value_type;
115 
116 
118  Writer& writer,
119  MemoryManager& theMemoryManager);
120 
121  virtual
123  {
124  }
125 
126  /**
127  * Output a line break.
128  */
129  void
131  {
132  assert(m_newlineString != 0);
133  assert(length(m_newlineString) == m_newlineStringLength);
134 
135  write(
136  m_newlineString,
137  m_newlineStringLength);
138  }
139 
140  size_type
142  const XalanDOMChar chars[],
143  size_type start,
145  bool& /* outsideCDATA */)
146  {
147  assert(chars != 0 && length != 0 && start < length);
148 
149  return write(chars, start, length);
150  }
151 
152  /**
153  * Writes name characters. If characters that are not
154  * representable are encountered, an exception is thrown.
155  */
156  void
158  const XalanDOMChar* data,
159  size_type theLength)
160  {
161  write(data, theLength);
162  }
163 
164  /**
165  * Writes PI characters. If characters that are not
166  * representable are encountered, an exception is thrown.
167  */
168  void
170  const XalanDOMChar* data,
171  size_type theLength)
172  {
173  write(data, theLength);
174  }
175 
176  /**
177  * Writes comment characters. If characters that are not
178  * representable are encountered, an exception is thrown.
179  */
180  void
182  const XalanDOMChar* data,
183  size_type theLength)
184  {
185  write(data, theLength);
186  }
187 
188  void
190  const XalanDOMChar* theChars,
191  size_type theLength)
192  {
193  for(size_type i = 0; i < theLength; ++i)
194  {
195  write(value_type(theChars[i]));
196  }
197  }
198 
199  void
201  const value_type* theChars,
202  size_type theLength)
203  {
204  #if defined(NDEBUG)
205  if (theLength > sizeof(m_buffer))
206  {
207  flushBuffer();
208 
209  m_writer.write(theChars, 0, theLength);
210  }
211  else
212  {
213  if (m_bufferRemaining < theLength)
214  {
215  flushBuffer();
216  }
217 
218  for(size_type i = 0; i < theLength; ++i)
219  {
220  *m_bufferPosition = theChars[i];
221 
222  ++m_bufferPosition;
223  }
224 
225  m_bufferRemaining -= theLength;
226  }
227  #else
228  for(size_type i = 0; i < theLength; ++i)
229  {
230  write(theChars[i]);
231  }
232  #endif
233  }
234 
235  void
236  write(const XalanDOMChar* theChars)
237  {
238  write(theChars, XalanDOMString::length(theChars));
239  }
240 
241  void
242  write(const XalanDOMString& theChars)
243  {
244  write(theChars.c_str(), theChars.length());
245  }
246 
247  void
248  write(value_type theChar)
249  {
250  assert(theChar < 128);
251 
252  if (m_bufferRemaining == 0)
253  {
254  flushBuffer();
255  }
256 
257  *m_bufferPosition = theChar;
258 
259  ++m_bufferPosition;
260  --m_bufferRemaining;
261  }
262 
263  void
265  const XalanDOMChar* theChars,
266  size_type theLength)
267  {
268  for(size_type i = 0; i < theLength; ++i)
269  {
270  if (isUTF16HighSurrogate(theChars[i]) == false)
271  {
272  write(static_cast<XalanUnicodeChar>(theChars[i]));
273  }
274  else if (i + 1 >= theLength)
275  {
276  throwInvalidUTF16SurrogateException(
277  theChars[i],
278  0,
279  getMemoryManager());
280  }
281  else
282  {
283  write(
284  decodeUTF16SurrogatePair(
285  theChars[i],
286  theChars[i + 1],
287  getMemoryManager()));
288 
289  ++i;
290  }
291  }
292  }
293 
294  size_type
296  const XalanDOMChar chars[],
297  size_type start,
299  {
300  const XalanDOMChar ch = chars[start];
301 
302  if (isUTF16HighSurrogate(ch) == false)
303  {
304  write(static_cast<XalanUnicodeChar>(ch));
305  }
306  else if (start + 1 >= length)
307  {
308  throwInvalidUTF16SurrogateException(
309  ch,
310  0,
311  getMemoryManager());
312  }
313  else
314  {
315  write(
316  decodeUTF16SurrogatePair(
317  ch,
318  chars[++start],
319  getMemoryManager()));
320  }
321 
322  return start;
323  }
324 
325  void
327  const XalanDOMChar* theChars,
328  size_type theLength)
329  {
330  for(size_type i = 0; i < theLength; ++i)
331  {
332  const XalanDOMChar ch = theChars[i];
333 
334  if (isUTF16HighSurrogate(ch) == true)
335  {
336  if (i + 1 >= theLength)
337  {
338  throwInvalidUTF16SurrogateException(
339  ch,
340  0,
341  getMemoryManager());
342  }
343  else
344  {
345  write(
346  decodeUTF16SurrogatePair(
347  ch,
348  theChars[i + 1],
349  getMemoryManager()));
350 
351  ++i;
352  }
353  }
354  else
355  {
356  write(static_cast<XalanUnicodeChar>(ch));
357  }
358  }
359 
360  }
361 
362  void
363  write(const value_type* theChars)
364  {
365  write(theChars, XalanDOMString::length(theChars));
366  }
367 
368  void
370  {
371  m_writer.flush();
372  }
373 
374  void
376  {
377  m_writer.write(m_buffer, 0, m_bufferPosition - m_buffer);
378 
379  m_bufferPosition = m_buffer;
380  m_bufferRemaining = kBufferSize;
381  }
382 
383 private:
384 
385  void
386  write(XalanUnicodeChar theChar)
387  {
388  if (theChar <= 0x7F)
389  {
390  write(char(theChar));
391  }
392  else if (theChar <= 0x7FF)
393  {
394  if (m_bufferRemaining < 2)
395  {
396  flushBuffer();
397  }
398 
399  *m_bufferPosition = leadingByteOf2(bits7to11(theChar));
400  ++m_bufferPosition;
401  *m_bufferPosition = trailingByte(bits1to6(theChar));
402  ++m_bufferPosition;
403 
404  m_bufferRemaining -= 2;
405  }
406  else if (theChar <= 0xFFFF)
407  {
408  // We should never get a high or low surrogate here...
409  assert(theChar < 0xD800 || theChar > 0xDBFF);
410  assert(theChar < 0xDC00 || theChar > 0xDFFF);
411 
412  if (m_bufferRemaining < 3)
413  {
414  flushBuffer();
415  }
416 
417  *m_bufferPosition = leadingByteOf3(bits13to16(theChar));
418  ++m_bufferPosition;
419  *m_bufferPosition = trailingByte(bits7to12(theChar));
420  ++m_bufferPosition;
421  *m_bufferPosition = trailingByte(bits1to6(theChar));
422  ++m_bufferPosition;
423 
424  m_bufferRemaining -= 3;
425  }
426  else if (theChar <= 0x10FFFF)
427  {
428  if (m_bufferRemaining < 4)
429  {
430  flushBuffer();
431  }
432 
433  *m_bufferPosition = leadingByteOf4(bits19to21(theChar));
434  ++m_bufferPosition;
435  *m_bufferPosition = trailingByte(bits13to18(theChar));
436  ++m_bufferPosition;
437  *m_bufferPosition = trailingByte(bits7to12(theChar));
438  ++m_bufferPosition;
439  *m_bufferPosition = trailingByte(bits1to6(theChar));
440  ++m_bufferPosition;
441 
442  m_bufferRemaining -= 4;
443  }
444  else
445  {
446  throwInvalidCharacterException(theChar, getMemoryManager());
447  }
448  }
449 
450  enum
451  {
452  kBufferSize = 512 // The size of the buffer
453  };
454 
455 
456  // Data members...
457  value_type m_buffer[kBufferSize];
458 
459  value_type* m_bufferPosition;
460 
461  size_type m_bufferRemaining;
462 };
463 
464 
465 
466 }
467 
468 
469 
470 #endif // XALANUTF8WRITER_HEADER_GUARD_1357924680
xalanc::XalanUTF8Writer::write
void write(const XalanDOMString &theChars)
Definition: XalanUTF8Writer.hpp:242
XALAN_CPP_NAMESPACE
#define XALAN_CPP_NAMESPACE
Xalan-C++ namespace, including major and minor version.
Definition: XalanVersion.hpp:76
xalanc::XalanUTF8Writer::write
void write(value_type theChar)
Definition: XalanUTF8Writer.hpp:248
xalanc::XalanUTF8Writer::write
size_type write(const XalanDOMChar chars[], size_type start, size_type length)
Definition: XalanUTF8Writer.hpp:295
xalanc::XalanUTF8Writer::writeCDATAChar
size_type writeCDATAChar(const XalanDOMChar chars[], size_type start, size_type length, bool &)
Definition: XalanUTF8Writer.hpp:141
xalanc::XalanUTF8Writer::flushBuffer
void flushBuffer()
Definition: XalanUTF8Writer.hpp:375
xalanc::leadingByteOf2
char leadingByteOf2(char theBits)
Definition: XalanUTF8Writer.hpp:79
xalanc::XalanUTF8Writer::writeNameChar
void writeNameChar(const XalanDOMChar *data, size_type theLength)
Writes name characters.
Definition: XalanUTF8Writer.hpp:157
xalanc::bits19to21
char bits19to21(XalanUnicodeChar theChar)
Definition: XalanUTF8Writer.hpp:31
xalanc::bits7to12
char bits7to12(XalanUnicodeChar theChar)
Definition: XalanUTF8Writer.hpp:55
xalanc::XalanUTF8Writer
Definition: XalanUTF8Writer.hpp:110
xalanc::XalanUTF8Writer::write
void write(const value_type *theChars, size_type theLength)
Definition: XalanUTF8Writer.hpp:200
xalanc::size_type
size_t size_type
Definition: XalanMap.hpp:46
xalanc::XalanUTF8Writer::write
void write(const XalanDOMChar *theChars, size_type theLength)
Definition: XalanUTF8Writer.hpp:264
xalanc::bits13to16
char bits13to16(XalanUnicodeChar theChar)
Definition: XalanUTF8Writer.hpp:47
xalanc::XalanUTF8Writer::write
void write(const value_type *theChars)
Definition: XalanUTF8Writer.hpp:363
xalanc::XalanUTF8Writer::flushWriter
void flushWriter()
Definition: XalanUTF8Writer.hpp:369
xalanc::XalanUTF8Writer::value_type
char value_type
Definition: XalanUTF8Writer.hpp:114
xalanc::XalanUTF8Writer::writePIChars
void writePIChars(const XalanDOMChar *data, size_type theLength)
Writes PI characters.
Definition: XalanUTF8Writer.hpp:169
xalanc::XalanDOMString::length
size_type length() const
Definition: XalanDOMString.hpp:209
xalanc::XalanUTF8Writer::write
void write(const XalanDOMChar *theChars)
Definition: XalanUTF8Writer.hpp:236
xalanc::XalanFormatterWriter::size_type
FormatterListener::size_type size_type
Definition: XalanFormatterWriter.hpp:48
xalanc::length
XalanDOMString::size_type length(const XalanDOMString &theString)
Get the length of a XalanDOMString.
Definition: DOMStringHelper.hpp:235
XalanFormatterWriter.hpp
xalanc::bits13to18
char bits13to18(XalanUnicodeChar theChar)
Definition: XalanUTF8Writer.hpp:39
xalanc::XalanUTF8Writer::safeWriteContent
void safeWriteContent(const XalanDOMChar *theChars, size_type theLength)
Definition: XalanUTF8Writer.hpp:189
xalanc::XalanFormatterWriter
Definition: XalanFormatterWriter.hpp:44
xalanc::XalanUTF8Writer::writeCommentChars
void writeCommentChars(const XalanDOMChar *data, size_type theLength)
Writes comment characters.
Definition: XalanUTF8Writer.hpp:181
xalanc::XalanUTF8Writer::~XalanUTF8Writer
virtual ~XalanUTF8Writer()
Definition: XalanUTF8Writer.hpp:122
xalanc::bits7to11
char bits7to11(XalanUnicodeChar theChar)
Definition: XalanUTF8Writer.hpp:63
xalanc::XalanDOMString::c_str
const XalanDOMChar * c_str() const
Definition: XalanDOMString.hpp:344
xalanc::XalanUTF8Writer::outputNewline
void outputNewline()
Output a line break.
Definition: XalanUTF8Writer.hpp:130
xalanc::trailingByte
char trailingByte(char theBits)
Definition: XalanUTF8Writer.hpp:103
xalanc::XalanDOMString
Definition: XalanDOMString.hpp:45
xalanc::XalanUTF8Writer::writeSafe
void writeSafe(const XalanDOMChar *theChars, size_type theLength)
Definition: XalanUTF8Writer.hpp:326
xalanc::leadingByteOf3
char leadingByteOf3(char theBits)
Definition: XalanUTF8Writer.hpp:87
xalanc::leadingByteOf4
char leadingByteOf4(char theBits)
Definition: XalanUTF8Writer.hpp:95
xalanc::bits1to6
char bits1to6(XalanUnicodeChar theChar)
Definition: XalanUTF8Writer.hpp:71
xalanc::Writer
Definition: Writer.hpp:44