001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the "License");
007 * you may not use this file except in compliance with the License.
008 * You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 /*
019 * $Id: FastStringBuffer.java 469279 2006-10-30 21:18:02Z minchau $
020 */
021 package org.apache.xml.utils;
022
023 /**
024 * Bare-bones, unsafe, fast string buffer. No thread-safety, no
025 * parameter range checking, exposed fields. Note that in typical
026 * applications, thread-safety of a StringBuffer is a somewhat
027 * dubious concept in any case.
028 * <p>
029 * Note that Stree and DTM used a single FastStringBuffer as a string pool,
030 * by recording start and length indices within this single buffer. This
031 * minimizes heap overhead, but of course requires more work when retrieving
032 * the data.
033 * <p>
034 * FastStringBuffer operates as a "chunked buffer". Doing so
035 * reduces the need to recopy existing information when an append
036 * exceeds the space available; we just allocate another chunk and
037 * flow across to it. (The array of chunks may need to grow,
038 * admittedly, but that's a much smaller object.) Some excess
039 * recopying may arise when we extract Strings which cross chunk
040 * boundaries; larger chunks make that less frequent.
041 * <p>
042 * The size values are parameterized, to allow tuning this code. In
043 * theory, Result Tree Fragments might want to be tuned differently
044 * from the main document's text.
045 * <p>
046 * %REVIEW% An experiment in self-tuning is
047 * included in the code (using nested FastStringBuffers to achieve
048 * variation in chunk sizes), but this implementation has proven to
049 * be problematic when data may be being copied from the FSB into itself.
050 * We should either re-architect that to make this safe (if possible)
051 * or remove that code and clean up for performance/maintainability reasons.
052 * <p>
053 */
054 public class FastStringBuffer
055 {
056 // If nonzero, forces the inial chunk size.
057 /**/static final int DEBUG_FORCE_INIT_BITS=0;
058
059 // %BUG% %REVIEW% *****PROBLEM SUSPECTED: If data from an FSB is being copied
060 // back into the same FSB (variable set from previous variable, for example)
061 // and blocksize changes in mid-copy... there's risk of severe malfunction in
062 // the read process, due to how the resizing code re-jiggers storage. Arggh.
063 // If we want to retain the variable-size-block feature, we need to reconsider
064 // that issue. For now, I have forced us into fixed-size mode.
065 static final boolean DEBUG_FORCE_FIXED_CHUNKSIZE=true;
066
067 /** Manifest constant: Suppress leading whitespace.
068 * This should be used when normalize-to-SAX is called for the first chunk of a
069 * multi-chunk output, or one following unsuppressed whitespace in a previous
070 * chunk.
071 * @see #sendNormalizedSAXcharacters(org.xml.sax.ContentHandler,int,int)
072 */
073 public static final int SUPPRESS_LEADING_WS=0x01;
074
075 /** Manifest constant: Suppress trailing whitespace.
076 * This should be used when normalize-to-SAX is called for the last chunk of a
077 * multi-chunk output; it may have to be or'ed with SUPPRESS_LEADING_WS.
078 */
079 public static final int SUPPRESS_TRAILING_WS=0x02;
080
081 /** Manifest constant: Suppress both leading and trailing whitespace.
082 * This should be used when normalize-to-SAX is called for a complete string.
083 * (I'm not wild about the name of this one. Ideas welcome.)
084 * @see #sendNormalizedSAXcharacters(org.xml.sax.ContentHandler,int,int)
085 */
086 public static final int SUPPRESS_BOTH
087 = SUPPRESS_LEADING_WS | SUPPRESS_TRAILING_WS;
088
089 /** Manifest constant: Carry trailing whitespace of one chunk as leading
090 * whitespace of the next chunk. Used internally; I don't see any reason
091 * to make it public right now.
092 */
093 private static final int CARRY_WS=0x04;
094
095 /**
096 * Field m_chunkBits sets our chunking strategy, by saying how many
097 * bits of index can be used within a single chunk before flowing over
098 * to the next chunk. For example, if m_chunkbits is set to 15, each
099 * chunk can contain up to 2^15 (32K) characters
100 */
101 int m_chunkBits = 15;
102
103 /**
104 * Field m_maxChunkBits affects our chunk-growth strategy, by saying what
105 * the largest permissible chunk size is in this particular FastStringBuffer
106 * hierarchy.
107 */
108 int m_maxChunkBits = 15;
109
110 /**
111 * Field m_rechunkBits affects our chunk-growth strategy, by saying how
112 * many chunks should be allocated at one size before we encapsulate them
113 * into the first chunk of the next size up. For example, if m_rechunkBits
114 * is set to 3, then after 8 chunks at a given size we will rebundle
115 * them as the first element of a FastStringBuffer using a chunk size
116 * 8 times larger (chunkBits shifted left three bits).
117 */
118 int m_rebundleBits = 2;
119
120 /**
121 * Field m_chunkSize establishes the maximum size of one chunk of the array
122 * as 2**chunkbits characters.
123 * (Which may also be the minimum size if we aren't tuning for storage)
124 */
125 int m_chunkSize; // =1<<(m_chunkBits-1);
126
127 /**
128 * Field m_chunkMask is m_chunkSize-1 -- in other words, m_chunkBits
129 * worth of low-order '1' bits, useful for shift-and-mask addressing
130 * within the chunks.
131 */
132 int m_chunkMask; // =m_chunkSize-1;
133
134 /**
135 * Field m_array holds the string buffer's text contents, using an
136 * array-of-arrays. Note that this array, and the arrays it contains, may be
137 * reallocated when necessary in order to allow the buffer to grow;
138 * references to them should be considered to be invalidated after any
139 * append. However, the only time these arrays are directly exposed
140 * is in the sendSAXcharacters call.
141 */
142 char[][] m_array;
143
144 /**
145 * Field m_lastChunk is an index into m_array[], pointing to the last
146 * chunk of the Chunked Array currently in use. Note that additional
147 * chunks may actually be allocated, eg if the FastStringBuffer had
148 * previously been truncated or if someone issued an ensureSpace request.
149 * <p>
150 * The insertion point for append operations is addressed by the combination
151 * of m_lastChunk and m_firstFree.
152 */
153 int m_lastChunk = 0;
154
155 /**
156 * Field m_firstFree is an index into m_array[m_lastChunk][], pointing to
157 * the first character in the Chunked Array which is not part of the
158 * FastStringBuffer's current content. Since m_array[][] is zero-based,
159 * the length of that content can be calculated as
160 * (m_lastChunk<<m_chunkBits) + m_firstFree
161 */
162 int m_firstFree = 0;
163
164 /**
165 * Field m_innerFSB, when non-null, is a FastStringBuffer whose total
166 * length equals m_chunkSize, and which replaces m_array[0]. This allows
167 * building a hierarchy of FastStringBuffers, where early appends use
168 * a smaller chunkSize (for less wasted memory overhead) but later
169 * ones use a larger chunkSize (for less heap activity overhead).
170 */
171 FastStringBuffer m_innerFSB = null;
172
173 /**
174 * Construct a FastStringBuffer, with allocation policy as per parameters.
175 * <p>
176 * For coding convenience, I've expressed both allocation sizes in terms of
177 * a number of bits. That's needed for the final size of a chunk,
178 * to permit fast and efficient shift-and-mask addressing. It's less critical
179 * for the inital size, and may be reconsidered.
180 * <p>
181 * An alternative would be to accept integer sizes and round to powers of two;
182 * that really doesn't seem to buy us much, if anything.
183 *
184 * @param initChunkBits Length in characters of the initial allocation
185 * of a chunk, expressed in log-base-2. (That is, 10 means allocate 1024
186 * characters.) Later chunks will use larger allocation units, to trade off
187 * allocation speed of large document against storage efficiency of small
188 * ones.
189 * @param maxChunkBits Number of character-offset bits that should be used for
190 * addressing within a chunk. Maximum length of a chunk is 2^chunkBits
191 * characters.
192 * @param rebundleBits Number of character-offset bits that addressing should
193 * advance before we attempt to take a step from initChunkBits to maxChunkBits
194 */
195 public FastStringBuffer(int initChunkBits, int maxChunkBits,
196 int rebundleBits)
197 {
198 if(DEBUG_FORCE_INIT_BITS!=0) initChunkBits=DEBUG_FORCE_INIT_BITS;
199
200 // %REVIEW%
201 // Should this force to larger value, or smaller? Smaller less efficient, but if
202 // someone requested variable mode it's because they care about storage space.
203 // On the other hand, given the other changes I'm making, odds are that we should
204 // adopt the larger size. Dither, dither, dither... This is just stopgap workaround
205 // anyway; we need a permanant solution.
206 //
207 if(DEBUG_FORCE_FIXED_CHUNKSIZE) maxChunkBits=initChunkBits;
208 //if(DEBUG_FORCE_FIXED_CHUNKSIZE) initChunkBits=maxChunkBits;
209
210 m_array = new char[16][];
211
212 // Don't bite off more than we're prepared to swallow!
213 if (initChunkBits > maxChunkBits)
214 initChunkBits = maxChunkBits;
215
216 m_chunkBits = initChunkBits;
217 m_maxChunkBits = maxChunkBits;
218 m_rebundleBits = rebundleBits;
219 m_chunkSize = 1 << (initChunkBits);
220 m_chunkMask = m_chunkSize - 1;
221 m_array[0] = new char[m_chunkSize];
222 }
223
224 /**
225 * Construct a FastStringBuffer, using a default rebundleBits value.
226 *
227 * NEEDSDOC @param initChunkBits
228 * NEEDSDOC @param maxChunkBits
229 */
230 public FastStringBuffer(int initChunkBits, int maxChunkBits)
231 {
232 this(initChunkBits, maxChunkBits, 2);
233 }
234
235 /**
236 * Construct a FastStringBuffer, using default maxChunkBits and
237 * rebundleBits values.
238 * <p>
239 * ISSUE: Should this call assert initial size, or fixed size?
240 * Now configured as initial, with a default for fixed.
241 *
242 * NEEDSDOC @param initChunkBits
243 */
244 public FastStringBuffer(int initChunkBits)
245 {
246 this(initChunkBits, 15, 2);
247 }
248
249 /**
250 * Construct a FastStringBuffer, using a default allocation policy.
251 */
252 public FastStringBuffer()
253 {
254
255 // 10 bits is 1K. 15 bits is 32K. Remember that these are character
256 // counts, so actual memory allocation unit is doubled for UTF-16 chars.
257 //
258 // For reference: In the original FastStringBuffer, we simply
259 // overallocated by blocksize (default 1KB) on each buffer-growth.
260 this(10, 15, 2);
261 }
262
263 /**
264 * Get the length of the list. Synonym for length().
265 *
266 * @return the number of characters in the FastStringBuffer's content.
267 */
268 public final int size()
269 {
270 return (m_lastChunk << m_chunkBits) + m_firstFree;
271 }
272
273 /**
274 * Get the length of the list. Synonym for size().
275 *
276 * @return the number of characters in the FastStringBuffer's content.
277 */
278 public final int length()
279 {
280 return (m_lastChunk << m_chunkBits) + m_firstFree;
281 }
282
283 /**
284 * Discard the content of the FastStringBuffer, and most of the memory
285 * that was allocated by it, restoring the initial state. Note that this
286 * may eventually be different from setLength(0), which see.
287 */
288 public final void reset()
289 {
290
291 m_lastChunk = 0;
292 m_firstFree = 0;
293
294 // Recover the original chunk size
295 FastStringBuffer innermost = this;
296
297 while (innermost.m_innerFSB != null)
298 {
299 innermost = innermost.m_innerFSB;
300 }
301
302 m_chunkBits = innermost.m_chunkBits;
303 m_chunkSize = innermost.m_chunkSize;
304 m_chunkMask = innermost.m_chunkMask;
305
306 // Discard the hierarchy
307 m_innerFSB = null;
308 m_array = new char[16][0];
309 m_array[0] = new char[m_chunkSize];
310 }
311
312 /**
313 * Directly set how much of the FastStringBuffer's storage is to be
314 * considered part of its content. This is a fast but hazardous
315 * operation. It is not protected against negative values, or values
316 * greater than the amount of storage currently available... and even
317 * if additional storage does exist, its contents are unpredictable.
318 * The only safe use for our setLength() is to truncate the FastStringBuffer
319 * to a shorter string.
320 *
321 * @param l New length. If l<0 or l>=getLength(), this operation will
322 * not report an error but future operations will almost certainly fail.
323 */
324 public final void setLength(int l)
325 {
326 m_lastChunk = l >>> m_chunkBits;
327
328 if (m_lastChunk == 0 && m_innerFSB != null)
329 {
330 // Replace this FSB with the appropriate inner FSB, truncated
331 m_innerFSB.setLength(l, this);
332 }
333 else
334 {
335 m_firstFree = l & m_chunkMask;
336
337 // There's an edge case if l is an exact multiple of m_chunkBits, which risks leaving
338 // us pointing at the start of a chunk which has not yet been allocated. Rather than
339 // pay the cost of dealing with that in the append loops (more scattered and more
340 // inner-loop), we correct it here by moving to the safe side of that
341 // line -- as we would have left the indexes had we appended up to that point.
342 if(m_firstFree==0 && m_lastChunk>0)
343 {
344 --m_lastChunk;
345 m_firstFree=m_chunkSize;
346 }
347 }
348 }
349
350 /**
351 * Subroutine for the public setLength() method. Deals with the fact
352 * that truncation may require restoring one of the innerFSBs
353 *
354 * NEEDSDOC @param l
355 * NEEDSDOC @param rootFSB
356 */
357 private final void setLength(int l, FastStringBuffer rootFSB)
358 {
359
360 m_lastChunk = l >>> m_chunkBits;
361
362 if (m_lastChunk == 0 && m_innerFSB != null)
363 {
364 m_innerFSB.setLength(l, rootFSB);
365 }
366 else
367 {
368
369 // Undo encapsulation -- pop the innerFSB data back up to root.
370 // Inefficient, but attempts to keep the code simple.
371 rootFSB.m_chunkBits = m_chunkBits;
372 rootFSB.m_maxChunkBits = m_maxChunkBits;
373 rootFSB.m_rebundleBits = m_rebundleBits;
374 rootFSB.m_chunkSize = m_chunkSize;
375 rootFSB.m_chunkMask = m_chunkMask;
376 rootFSB.m_array = m_array;
377 rootFSB.m_innerFSB = m_innerFSB;
378 rootFSB.m_lastChunk = m_lastChunk;
379
380 // Finally, truncate this sucker.
381 rootFSB.m_firstFree = l & m_chunkMask;
382 }
383 }
384
385 /**
386 * Note that this operation has been somewhat deoptimized by the shift to a
387 * chunked array, as there is no factory method to produce a String object
388 * directly from an array of arrays and hence a double copy is needed.
389 * By using ensureCapacity we hope to minimize the heap overhead of building
390 * the intermediate StringBuffer.
391 * <p>
392 * (It really is a pity that Java didn't design String as a final subclass
393 * of MutableString, rather than having StringBuffer be a separate hierarchy.
394 * We'd avoid a <strong>lot</strong> of double-buffering.)
395 *
396 * @return the contents of the FastStringBuffer as a standard Java string.
397 */
398 public final String toString()
399 {
400
401 int length = (m_lastChunk << m_chunkBits) + m_firstFree;
402
403 return getString(new StringBuffer(length), 0, 0, length).toString();
404 }
405
406 /**
407 * Append a single character onto the FastStringBuffer, growing the
408 * storage if necessary.
409 * <p>
410 * NOTE THAT after calling append(), previously obtained
411 * references to m_array[][] may no longer be valid....
412 * though in fact they should be in this instance.
413 *
414 * @param value character to be appended.
415 */
416 public final void append(char value)
417 {
418
419 char[] chunk;
420
421 // We may have preallocated chunks. If so, all but last should
422 // be at full size.
423
424 if (m_firstFree < m_chunkSize) // Simplified test single-character-fits
425 chunk = m_array[m_lastChunk];
426 else
427 {
428
429 // Extend array?
430 int i = m_array.length;
431
432 if (m_lastChunk + 1 == i)
433 {
434 char[][] newarray = new char[i + 16][];
435
436 System.arraycopy(m_array, 0, newarray, 0, i);
437
438 m_array = newarray;
439 }
440
441 // Advance one chunk
442 chunk = m_array[++m_lastChunk];
443
444 if (chunk == null)
445 {
446
447 // Hierarchical encapsulation
448 if (m_lastChunk == 1 << m_rebundleBits
449 && m_chunkBits < m_maxChunkBits)
450 {
451
452 // Should do all the work of both encapsulating
453 // existing data and establishing new sizes/offsets
454 m_innerFSB = new FastStringBuffer(this);
455 }
456
457 // Add a chunk.
458 chunk = m_array[m_lastChunk] = new char[m_chunkSize];
459 }
460
461 m_firstFree = 0;
462 }
463
464 // Space exists in the chunk. Append the character.
465 chunk[m_firstFree++] = value;
466 }
467
468 /**
469 * Append the contents of a String onto the FastStringBuffer,
470 * growing the storage if necessary.
471 * <p>
472 * NOTE THAT after calling append(), previously obtained
473 * references to m_array[] may no longer be valid.
474 *
475 * @param value String whose contents are to be appended.
476 */
477 public final void append(String value)
478 {
479
480 if (value == null)
481 return;
482 int strlen = value.length();
483
484 if (0 == strlen)
485 return;
486
487 int copyfrom = 0;
488 char[] chunk = m_array[m_lastChunk];
489 int available = m_chunkSize - m_firstFree;
490
491 // Repeat while data remains to be copied
492 while (strlen > 0)
493 {
494
495 // Copy what fits
496 if (available > strlen)
497 available = strlen;
498
499 value.getChars(copyfrom, copyfrom + available, m_array[m_lastChunk],
500 m_firstFree);
501
502 strlen -= available;
503 copyfrom += available;
504
505 // If there's more left, allocate another chunk and continue
506 if (strlen > 0)
507 {
508
509 // Extend array?
510 int i = m_array.length;
511
512 if (m_lastChunk + 1 == i)
513 {
514 char[][] newarray = new char[i + 16][];
515
516 System.arraycopy(m_array, 0, newarray, 0, i);
517
518 m_array = newarray;
519 }
520
521 // Advance one chunk
522 chunk = m_array[++m_lastChunk];
523
524 if (chunk == null)
525 {
526
527 // Hierarchical encapsulation
528 if (m_lastChunk == 1 << m_rebundleBits
529 && m_chunkBits < m_maxChunkBits)
530 {
531
532 // Should do all the work of both encapsulating
533 // existing data and establishing new sizes/offsets
534 m_innerFSB = new FastStringBuffer(this);
535 }
536
537 // Add a chunk.
538 chunk = m_array[m_lastChunk] = new char[m_chunkSize];
539 }
540
541 available = m_chunkSize;
542 m_firstFree = 0;
543 }
544 }
545
546 // Adjust the insert point in the last chunk, when we've reached it.
547 m_firstFree += available;
548 }
549
550 /**
551 * Append the contents of a StringBuffer onto the FastStringBuffer,
552 * growing the storage if necessary.
553 * <p>
554 * NOTE THAT after calling append(), previously obtained
555 * references to m_array[] may no longer be valid.
556 *
557 * @param value StringBuffer whose contents are to be appended.
558 */
559 public final void append(StringBuffer value)
560 {
561
562 if (value == null)
563 return;
564 int strlen = value.length();
565
566 if (0 == strlen)
567 return;
568
569 int copyfrom = 0;
570 char[] chunk = m_array[m_lastChunk];
571 int available = m_chunkSize - m_firstFree;
572
573 // Repeat while data remains to be copied
574 while (strlen > 0)
575 {
576
577 // Copy what fits
578 if (available > strlen)
579 available = strlen;
580
581 value.getChars(copyfrom, copyfrom + available, m_array[m_lastChunk],
582 m_firstFree);
583
584 strlen -= available;
585 copyfrom += available;
586
587 // If there's more left, allocate another chunk and continue
588 if (strlen > 0)
589 {
590
591 // Extend array?
592 int i = m_array.length;
593
594 if (m_lastChunk + 1 == i)
595 {
596 char[][] newarray = new char[i + 16][];
597
598 System.arraycopy(m_array, 0, newarray, 0, i);
599
600 m_array = newarray;
601 }
602
603 // Advance one chunk
604 chunk = m_array[++m_lastChunk];
605
606 if (chunk == null)
607 {
608
609 // Hierarchical encapsulation
610 if (m_lastChunk == 1 << m_rebundleBits
611 && m_chunkBits < m_maxChunkBits)
612 {
613
614 // Should do all the work of both encapsulating
615 // existing data and establishing new sizes/offsets
616 m_innerFSB = new FastStringBuffer(this);
617 }
618
619 // Add a chunk.
620 chunk = m_array[m_lastChunk] = new char[m_chunkSize];
621 }
622
623 available = m_chunkSize;
624 m_firstFree = 0;
625 }
626 }
627
628 // Adjust the insert point in the last chunk, when we've reached it.
629 m_firstFree += available;
630 }
631
632 /**
633 * Append part of the contents of a Character Array onto the
634 * FastStringBuffer, growing the storage if necessary.
635 * <p>
636 * NOTE THAT after calling append(), previously obtained
637 * references to m_array[] may no longer be valid.
638 *
639 * @param chars character array from which data is to be copied
640 * @param start offset in chars of first character to be copied,
641 * zero-based.
642 * @param length number of characters to be copied
643 */
644 public final void append(char[] chars, int start, int length)
645 {
646
647 int strlen = length;
648
649 if (0 == strlen)
650 return;
651
652 int copyfrom = start;
653 char[] chunk = m_array[m_lastChunk];
654 int available = m_chunkSize - m_firstFree;
655
656 // Repeat while data remains to be copied
657 while (strlen > 0)
658 {
659
660 // Copy what fits
661 if (available > strlen)
662 available = strlen;
663
664 System.arraycopy(chars, copyfrom, m_array[m_lastChunk], m_firstFree,
665 available);
666
667 strlen -= available;
668 copyfrom += available;
669
670 // If there's more left, allocate another chunk and continue
671 if (strlen > 0)
672 {
673
674 // Extend array?
675 int i = m_array.length;
676
677 if (m_lastChunk + 1 == i)
678 {
679 char[][] newarray = new char[i + 16][];
680
681 System.arraycopy(m_array, 0, newarray, 0, i);
682
683 m_array = newarray;
684 }
685
686 // Advance one chunk
687 chunk = m_array[++m_lastChunk];
688
689 if (chunk == null)
690 {
691
692 // Hierarchical encapsulation
693 if (m_lastChunk == 1 << m_rebundleBits
694 && m_chunkBits < m_maxChunkBits)
695 {
696
697 // Should do all the work of both encapsulating
698 // existing data and establishing new sizes/offsets
699 m_innerFSB = new FastStringBuffer(this);
700 }
701
702 // Add a chunk.
703 chunk = m_array[m_lastChunk] = new char[m_chunkSize];
704 }
705
706 available = m_chunkSize;
707 m_firstFree = 0;
708 }
709 }
710
711 // Adjust the insert point in the last chunk, when we've reached it.
712 m_firstFree += available;
713 }
714
715 /**
716 * Append the contents of another FastStringBuffer onto
717 * this FastStringBuffer, growing the storage if necessary.
718 * <p>
719 * NOTE THAT after calling append(), previously obtained
720 * references to m_array[] may no longer be valid.
721 *
722 * @param value FastStringBuffer whose contents are
723 * to be appended.
724 */
725 public final void append(FastStringBuffer value)
726 {
727
728 // Complicating factor here is that the two buffers may use
729 // different chunk sizes, and even if they're the same we're
730 // probably on a different alignment due to previously appended
731 // data. We have to work through the source in bite-sized chunks.
732 if (value == null)
733 return;
734 int strlen = value.length();
735
736 if (0 == strlen)
737 return;
738
739 int copyfrom = 0;
740 char[] chunk = m_array[m_lastChunk];
741 int available = m_chunkSize - m_firstFree;
742
743 // Repeat while data remains to be copied
744 while (strlen > 0)
745 {
746
747 // Copy what fits
748 if (available > strlen)
749 available = strlen;
750
751 int sourcechunk = (copyfrom + value.m_chunkSize - 1)
752 >>> value.m_chunkBits;
753 int sourcecolumn = copyfrom & value.m_chunkMask;
754 int runlength = value.m_chunkSize - sourcecolumn;
755
756 if (runlength > available)
757 runlength = available;
758
759 System.arraycopy(value.m_array[sourcechunk], sourcecolumn,
760 m_array[m_lastChunk], m_firstFree, runlength);
761
762 if (runlength != available)
763 System.arraycopy(value.m_array[sourcechunk + 1], 0,
764 m_array[m_lastChunk], m_firstFree + runlength,
765 available - runlength);
766
767 strlen -= available;
768 copyfrom += available;
769
770 // If there's more left, allocate another chunk and continue
771 if (strlen > 0)
772 {
773
774 // Extend array?
775 int i = m_array.length;
776
777 if (m_lastChunk + 1 == i)
778 {
779 char[][] newarray = new char[i + 16][];
780
781 System.arraycopy(m_array, 0, newarray, 0, i);
782
783 m_array = newarray;
784 }
785
786 // Advance one chunk
787 chunk = m_array[++m_lastChunk];
788
789 if (chunk == null)
790 {
791
792 // Hierarchical encapsulation
793 if (m_lastChunk == 1 << m_rebundleBits
794 && m_chunkBits < m_maxChunkBits)
795 {
796
797 // Should do all the work of both encapsulating
798 // existing data and establishing new sizes/offsets
799 m_innerFSB = new FastStringBuffer(this);
800 }
801
802 // Add a chunk.
803 chunk = m_array[m_lastChunk] = new char[m_chunkSize];
804 }
805
806 available = m_chunkSize;
807 m_firstFree = 0;
808 }
809 }
810
811 // Adjust the insert point in the last chunk, when we've reached it.
812 m_firstFree += available;
813 }
814
815 /**
816 * @return true if the specified range of characters are all whitespace,
817 * as defined by XMLCharacterRecognizer.
818 * <p>
819 * CURRENTLY DOES NOT CHECK FOR OUT-OF-RANGE.
820 *
821 * @param start Offset of first character in the range.
822 * @param length Number of characters to send.
823 */
824 public boolean isWhitespace(int start, int length)
825 {
826
827 int sourcechunk = start >>> m_chunkBits;
828 int sourcecolumn = start & m_chunkMask;
829 int available = m_chunkSize - sourcecolumn;
830 boolean chunkOK;
831
832 while (length > 0)
833 {
834 int runlength = (length <= available) ? length : available;
835
836 if (sourcechunk == 0 && m_innerFSB != null)
837 chunkOK = m_innerFSB.isWhitespace(sourcecolumn, runlength);
838 else
839 chunkOK = org.apache.xml.utils.XMLCharacterRecognizer.isWhiteSpace(
840 m_array[sourcechunk], sourcecolumn, runlength);
841
842 if (!chunkOK)
843 return false;
844
845 length -= runlength;
846
847 ++sourcechunk;
848
849 sourcecolumn = 0;
850 available = m_chunkSize;
851 }
852
853 return true;
854 }
855
856 /**
857 * @param start Offset of first character in the range.
858 * @param length Number of characters to send.
859 * @return a new String object initialized from the specified range of
860 * characters.
861 */
862 public String getString(int start, int length)
863 {
864 int startColumn = start & m_chunkMask;
865 int startChunk = start >>> m_chunkBits;
866 if (startColumn + length < m_chunkMask && m_innerFSB == null) {
867 return getOneChunkString(startChunk, startColumn, length);
868 }
869 return getString(new StringBuffer(length), startChunk, startColumn,
870 length).toString();
871 }
872
873 protected String getOneChunkString(int startChunk, int startColumn,
874 int length) {
875 return new String(m_array[startChunk], startColumn, length);
876 }
877
878 /**
879 * @param sb StringBuffer to be appended to
880 * @param start Offset of first character in the range.
881 * @param length Number of characters to send.
882 * @return sb with the requested text appended to it
883 */
884 StringBuffer getString(StringBuffer sb, int start, int length)
885 {
886 return getString(sb, start >>> m_chunkBits, start & m_chunkMask, length);
887 }
888
889 /**
890 * Internal support for toString() and getString().
891 * PLEASE NOTE SIGNATURE CHANGE from earlier versions; it now appends into
892 * and returns a StringBuffer supplied by the caller. This simplifies
893 * m_innerFSB support.
894 * <p>
895 * Note that this operation has been somewhat deoptimized by the shift to a
896 * chunked array, as there is no factory method to produce a String object
897 * directly from an array of arrays and hence a double copy is needed.
898 * By presetting length we hope to minimize the heap overhead of building
899 * the intermediate StringBuffer.
900 * <p>
901 * (It really is a pity that Java didn't design String as a final subclass
902 * of MutableString, rather than having StringBuffer be a separate hierarchy.
903 * We'd avoid a <strong>lot</strong> of double-buffering.)
904 *
905 *
906 * @param sb
907 * @param startChunk
908 * @param startColumn
909 * @param length
910 *
911 * @return the contents of the FastStringBuffer as a standard Java string.
912 */
913 StringBuffer getString(StringBuffer sb, int startChunk, int startColumn,
914 int length)
915 {
916
917 int stop = (startChunk << m_chunkBits) + startColumn + length;
918 int stopChunk = stop >>> m_chunkBits;
919 int stopColumn = stop & m_chunkMask;
920
921 // Factored out
922 //StringBuffer sb=new StringBuffer(length);
923 for (int i = startChunk; i < stopChunk; ++i)
924 {
925 if (i == 0 && m_innerFSB != null)
926 m_innerFSB.getString(sb, startColumn, m_chunkSize - startColumn);
927 else
928 sb.append(m_array[i], startColumn, m_chunkSize - startColumn);
929
930 startColumn = 0; // after first chunk
931 }
932
933 if (stopChunk == 0 && m_innerFSB != null)
934 m_innerFSB.getString(sb, startColumn, stopColumn - startColumn);
935 else if (stopColumn > startColumn)
936 sb.append(m_array[stopChunk], startColumn, stopColumn - startColumn);
937
938 return sb;
939 }
940
941 /**
942 * Get a single character from the string buffer.
943 *
944 *
945 * @param pos character position requested.
946 * @return A character from the requested position.
947 */
948 public char charAt(int pos)
949 {
950 int startChunk = pos >>> m_chunkBits;
951
952 if (startChunk == 0 && m_innerFSB != null)
953 return m_innerFSB.charAt(pos & m_chunkMask);
954 else
955 return m_array[startChunk][pos & m_chunkMask];
956 }
957
958 /**
959 * Sends the specified range of characters as one or more SAX characters()
960 * events.
961 * Note that the buffer reference passed to the ContentHandler may be
962 * invalidated if the FastStringBuffer is edited; it's the user's
963 * responsibility to manage access to the FastStringBuffer to prevent this
964 * problem from arising.
965 * <p>
966 * Note too that there is no promise that the output will be sent as a
967 * single call. As is always true in SAX, one logical string may be split
968 * across multiple blocks of memory and hence delivered as several
969 * successive events.
970 *
971 * @param ch SAX ContentHandler object to receive the event.
972 * @param start Offset of first character in the range.
973 * @param length Number of characters to send.
974 * @exception org.xml.sax.SAXException may be thrown by handler's
975 * characters() method.
976 */
977 public void sendSAXcharacters(
978 org.xml.sax.ContentHandler ch, int start, int length)
979 throws org.xml.sax.SAXException
980 {
981
982 int startChunk = start >>> m_chunkBits;
983 int startColumn = start & m_chunkMask;
984 if (startColumn + length < m_chunkMask && m_innerFSB == null) {
985 ch.characters(m_array[startChunk], startColumn, length);
986 return;
987 }
988
989 int stop = start + length;
990 int stopChunk = stop >>> m_chunkBits;
991 int stopColumn = stop & m_chunkMask;
992
993 for (int i = startChunk; i < stopChunk; ++i)
994 {
995 if (i == 0 && m_innerFSB != null)
996 m_innerFSB.sendSAXcharacters(ch, startColumn,
997 m_chunkSize - startColumn);
998 else
999 ch.characters(m_array[i], startColumn, m_chunkSize - startColumn);
1000
1001 startColumn = 0; // after first chunk
1002 }
1003
1004 // Last, or only, chunk
1005 if (stopChunk == 0 && m_innerFSB != null)
1006 m_innerFSB.sendSAXcharacters(ch, startColumn, stopColumn - startColumn);
1007 else if (stopColumn > startColumn)
1008 {
1009 ch.characters(m_array[stopChunk], startColumn,
1010 stopColumn - startColumn);
1011 }
1012 }
1013
1014 /**
1015 * Sends the specified range of characters as one or more SAX characters()
1016 * events, normalizing the characters according to XSLT rules.
1017 *
1018 * @param ch SAX ContentHandler object to receive the event.
1019 * @param start Offset of first character in the range.
1020 * @param length Number of characters to send.
1021 * @return normalization status to apply to next chunk (because we may
1022 * have been called recursively to process an inner FSB):
1023 * <dl>
1024 * <dt>0</dt>
1025 * <dd>if this output did not end in retained whitespace, and thus whitespace
1026 * at the start of the following chunk (if any) should be converted to a
1027 * single space.
1028 * <dt>SUPPRESS_LEADING_WS</dt>
1029 * <dd>if this output ended in retained whitespace, and thus whitespace
1030 * at the start of the following chunk (if any) should be completely
1031 * suppressed.</dd>
1032 * </dd>
1033 * </dl>
1034 * @exception org.xml.sax.SAXException may be thrown by handler's
1035 * characters() method.
1036 */
1037 public int sendNormalizedSAXcharacters(
1038 org.xml.sax.ContentHandler ch, int start, int length)
1039 throws org.xml.sax.SAXException
1040 {
1041 // This call always starts at the beginning of the
1042 // string being written out, either because it was called directly or
1043 // because it was an m_innerFSB recursion. This is important since
1044 // it gives us a well-known initial state for this flag:
1045 int stateForNextChunk=SUPPRESS_LEADING_WS;
1046
1047 int stop = start + length;
1048 int startChunk = start >>> m_chunkBits;
1049 int startColumn = start & m_chunkMask;
1050 int stopChunk = stop >>> m_chunkBits;
1051 int stopColumn = stop & m_chunkMask;
1052
1053 for (int i = startChunk; i < stopChunk; ++i)
1054 {
1055 if (i == 0 && m_innerFSB != null)
1056 stateForNextChunk=
1057 m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn,
1058 m_chunkSize - startColumn);
1059 else
1060 stateForNextChunk=
1061 sendNormalizedSAXcharacters(m_array[i], startColumn,
1062 m_chunkSize - startColumn,
1063 ch,stateForNextChunk);
1064
1065 startColumn = 0; // after first chunk
1066 }
1067
1068 // Last, or only, chunk
1069 if (stopChunk == 0 && m_innerFSB != null)
1070 stateForNextChunk= // %REVIEW% Is this update really needed?
1071 m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn, stopColumn - startColumn);
1072 else if (stopColumn > startColumn)
1073 {
1074 stateForNextChunk= // %REVIEW% Is this update really needed?
1075 sendNormalizedSAXcharacters(m_array[stopChunk],
1076 startColumn, stopColumn - startColumn,
1077 ch, stateForNextChunk | SUPPRESS_TRAILING_WS);
1078 }
1079 return stateForNextChunk;
1080 }
1081
1082 static final char[] SINGLE_SPACE = {' '};
1083
1084 /**
1085 * Internal method to directly normalize and dispatch the character array.
1086 * This version is aware of the fact that it may be called several times
1087 * in succession if the data is made up of multiple "chunks", and thus
1088 * must actively manage the handling of leading and trailing whitespace.
1089 *
1090 * Note: The recursion is due to the possible recursion of inner FSBs.
1091 *
1092 * @param ch The characters from the XML document.
1093 * @param start The start position in the array.
1094 * @param length The number of characters to read from the array.
1095 * @param handler SAX ContentHandler object to receive the event.
1096 * @param edgeTreatmentFlags How leading/trailing spaces should be handled.
1097 * This is a bitfield contining two flags, bitwise-ORed together:
1098 * <dl>
1099 * <dt>SUPPRESS_LEADING_WS</dt>
1100 * <dd>When false, causes leading whitespace to be converted to a single
1101 * space; when true, causes it to be discarded entirely.
1102 * Should be set TRUE for the first chunk, and (in multi-chunk output)
1103 * whenever the previous chunk ended in retained whitespace.</dd>
1104 * <dt>SUPPRESS_TRAILING_WS</dt>
1105 * <dd>When false, causes trailing whitespace to be converted to a single
1106 * space; when true, causes it to be discarded entirely.
1107 * Should be set TRUE for the last or only chunk.
1108 * </dd>
1109 * </dl>
1110 * @return normalization status, as in the edgeTreatmentFlags parameter:
1111 * <dl>
1112 * <dt>0</dt>
1113 * <dd>if this output did not end in retained whitespace, and thus whitespace
1114 * at the start of the following chunk (if any) should be converted to a
1115 * single space.
1116 * <dt>SUPPRESS_LEADING_WS</dt>
1117 * <dd>if this output ended in retained whitespace, and thus whitespace
1118 * at the start of the following chunk (if any) should be completely
1119 * suppressed.</dd>
1120 * </dd>
1121 * </dl>
1122 *
1123 *
1124 * @exception org.xml.sax.SAXException Any SAX exception, possibly
1125 * wrapping another exception.
1126 */
1127 static int sendNormalizedSAXcharacters(char ch[],
1128 int start, int length,
1129 org.xml.sax.ContentHandler handler,
1130 int edgeTreatmentFlags)
1131 throws org.xml.sax.SAXException
1132 {
1133 boolean processingLeadingWhitespace =
1134 ((edgeTreatmentFlags & SUPPRESS_LEADING_WS) != 0);
1135 boolean seenWhitespace = ((edgeTreatmentFlags & CARRY_WS) != 0);
1136 int currPos = start;
1137 int limit = start+length;
1138
1139 // Strip any leading spaces first, if required
1140 if (processingLeadingWhitespace) {
1141 for (; currPos < limit
1142 && XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
1143 currPos++) { }
1144
1145 // If we've only encountered leading spaces, the
1146 // current state remains unchanged
1147 if (currPos == limit) {
1148 return edgeTreatmentFlags;
1149 }
1150 }
1151
1152 // If we get here, there are no more leading spaces to strip
1153 while (currPos < limit) {
1154 int startNonWhitespace = currPos;
1155
1156 // Grab a chunk of non-whitespace characters
1157 for (; currPos < limit
1158 && !XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
1159 currPos++) { }
1160
1161 // Non-whitespace seen - emit them, along with a single
1162 // space for any preceding whitespace characters
1163 if (startNonWhitespace != currPos) {
1164 if (seenWhitespace) {
1165 handler.characters(SINGLE_SPACE, 0, 1);
1166 seenWhitespace = false;
1167 }
1168 handler.characters(ch, startNonWhitespace,
1169 currPos - startNonWhitespace);
1170 }
1171
1172 int startWhitespace = currPos;
1173
1174 // Consume any whitespace characters
1175 for (; currPos < limit
1176 && XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
1177 currPos++) { }
1178
1179 if (startWhitespace != currPos) {
1180 seenWhitespace = true;
1181 }
1182 }
1183
1184 return (seenWhitespace ? CARRY_WS : 0)
1185 | (edgeTreatmentFlags & SUPPRESS_TRAILING_WS);
1186 }
1187
1188 /**
1189 * Directly normalize and dispatch the character array.
1190 *
1191 * @param ch The characters from the XML document.
1192 * @param start The start position in the array.
1193 * @param length The number of characters to read from the array.
1194 * @param handler SAX ContentHandler object to receive the event.
1195 * @exception org.xml.sax.SAXException Any SAX exception, possibly
1196 * wrapping another exception.
1197 */
1198 public static void sendNormalizedSAXcharacters(char ch[],
1199 int start, int length,
1200 org.xml.sax.ContentHandler handler)
1201 throws org.xml.sax.SAXException
1202 {
1203 sendNormalizedSAXcharacters(ch, start, length,
1204 handler, SUPPRESS_BOTH);
1205 }
1206
1207 /**
1208 * Sends the specified range of characters as sax Comment.
1209 * <p>
1210 * Note that, unlike sendSAXcharacters, this has to be done as a single
1211 * call to LexicalHandler#comment.
1212 *
1213 * @param ch SAX LexicalHandler object to receive the event.
1214 * @param start Offset of first character in the range.
1215 * @param length Number of characters to send.
1216 * @exception org.xml.sax.SAXException may be thrown by handler's
1217 * characters() method.
1218 */
1219 public void sendSAXComment(
1220 org.xml.sax.ext.LexicalHandler ch, int start, int length)
1221 throws org.xml.sax.SAXException
1222 {
1223
1224 // %OPT% Do it this way for now...
1225 String comment = getString(start, length);
1226 ch.comment(comment.toCharArray(), 0, length);
1227 }
1228
1229 /**
1230 * Copies characters from this string into the destination character
1231 * array.
1232 *
1233 * @param srcBegin index of the first character in the string
1234 * to copy.
1235 * @param srcEnd index after the last character in the string
1236 * to copy.
1237 * @param dst the destination array.
1238 * @param dstBegin the start offset in the destination array.
1239 * @exception IndexOutOfBoundsException If any of the following
1240 * is true:
1241 * <ul><li><code>srcBegin</code> is negative.
1242 * <li><code>srcBegin</code> is greater than <code>srcEnd</code>
1243 * <li><code>srcEnd</code> is greater than the length of this
1244 * string
1245 * <li><code>dstBegin</code> is negative
1246 * <li><code>dstBegin+(srcEnd-srcBegin)</code> is larger than
1247 * <code>dst.length</code></ul>
1248 * @exception NullPointerException if <code>dst</code> is <code>null</code>
1249 */
1250 private void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin)
1251 {
1252 // %TBD% Joe needs to write this function. Make public when implemented.
1253 }
1254
1255 /**
1256 * Encapsulation c'tor. After this is called, the source FastStringBuffer
1257 * will be reset to use the new object as its m_innerFSB, and will have
1258 * had its chunk size reset appropriately. IT SHOULD NEVER BE CALLED
1259 * EXCEPT WHEN source.length()==1<<(source.m_chunkBits+source.m_rebundleBits)
1260 *
1261 * NEEDSDOC @param source
1262 */
1263 private FastStringBuffer(FastStringBuffer source)
1264 {
1265
1266 // Copy existing information into new encapsulation
1267 m_chunkBits = source.m_chunkBits;
1268 m_maxChunkBits = source.m_maxChunkBits;
1269 m_rebundleBits = source.m_rebundleBits;
1270 m_chunkSize = source.m_chunkSize;
1271 m_chunkMask = source.m_chunkMask;
1272 m_array = source.m_array;
1273 m_innerFSB = source.m_innerFSB;
1274
1275 // These have to be adjusted because we're calling just at the time
1276 // when we would be about to allocate another chunk
1277 m_lastChunk = source.m_lastChunk - 1;
1278 m_firstFree = source.m_chunkSize;
1279
1280 // Establish capsule as the Inner FSB, reset chunk sizes/addressing
1281 source.m_array = new char[16][];
1282 source.m_innerFSB = this;
1283
1284 // Since we encapsulated just as we were about to append another
1285 // chunk, return ready to create the chunk after the innerFSB
1286 // -- 1, not 0.
1287 source.m_lastChunk = 1;
1288 source.m_firstFree = 0;
1289 source.m_chunkBits += m_rebundleBits;
1290 source.m_chunkSize = 1 << (source.m_chunkBits);
1291 source.m_chunkMask = source.m_chunkSize - 1;
1292 }
1293 }