001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements. See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership. The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the  "License");
007     * you may not use this file except in compliance with the License.
008     * You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    /*
019     * $Id: FastStringBuffer.java 469279 2006-10-30 21:18:02Z minchau $
020     */
021    package org.apache.xml.utils;
022    
023    /**
024     * Bare-bones, unsafe, fast string buffer. No thread-safety, no
025     * parameter range checking, exposed fields. Note that in typical
026     * applications, thread-safety of a StringBuffer is a somewhat
027     * dubious concept in any case.
028     * <p>
029     * Note that Stree and DTM used a single FastStringBuffer as a string pool,
030     * by recording start and length indices within this single buffer. This
031     * minimizes heap overhead, but of course requires more work when retrieving
032     * the data.
033     * <p>
034     * FastStringBuffer operates as a "chunked buffer". Doing so
035     * reduces the need to recopy existing information when an append
036     * exceeds the space available; we just allocate another chunk and
037     * flow across to it. (The array of chunks may need to grow,
038     * admittedly, but that's a much smaller object.) Some excess
039     * recopying may arise when we extract Strings which cross chunk
040     * boundaries; larger chunks make that less frequent.
041     * <p>
042     * The size values are parameterized, to allow tuning this code. In
043     * theory, Result Tree Fragments might want to be tuned differently 
044     * from the main document's text. 
045     * <p>
046     * %REVIEW% An experiment in self-tuning is
047     * included in the code (using nested FastStringBuffers to achieve
048     * variation in chunk sizes), but this implementation has proven to
049     * be problematic when data may be being copied from the FSB into itself.
050     * We should either re-architect that to make this safe (if possible)
051     * or remove that code and clean up for performance/maintainability reasons.
052     * <p>
053     */
054    public class FastStringBuffer
055    {
056      // If nonzero, forces the inial chunk size.
057      /**/static final int DEBUG_FORCE_INIT_BITS=0;
058      
059            // %BUG% %REVIEW% *****PROBLEM SUSPECTED: If data from an FSB is being copied
060            // back into the same FSB (variable set from previous variable, for example) 
061            // and blocksize changes in mid-copy... there's risk of severe malfunction in 
062            // the read process, due to how the resizing code re-jiggers storage. Arggh. 
063            // If we want to retain the variable-size-block feature, we need to reconsider 
064            // that issue. For now, I have forced us into fixed-size mode.
065        static final boolean DEBUG_FORCE_FIXED_CHUNKSIZE=true;
066    
067            /** Manifest constant: Suppress leading whitespace.
068             * This should be used when normalize-to-SAX is called for the first chunk of a
069             * multi-chunk output, or one following unsuppressed whitespace in a previous
070             * chunk.
071             * @see #sendNormalizedSAXcharacters(org.xml.sax.ContentHandler,int,int)
072             */
073            public static final int SUPPRESS_LEADING_WS=0x01;
074            
075            /** Manifest constant: Suppress trailing whitespace.
076             * This should be used when normalize-to-SAX is called for the last chunk of a
077             * multi-chunk output; it may have to be or'ed with SUPPRESS_LEADING_WS.
078             */
079            public static final int SUPPRESS_TRAILING_WS=0x02;
080            
081            /** Manifest constant: Suppress both leading and trailing whitespace.
082             * This should be used when normalize-to-SAX is called for a complete string.
083             * (I'm not wild about the name of this one. Ideas welcome.)
084             * @see #sendNormalizedSAXcharacters(org.xml.sax.ContentHandler,int,int)
085             */
086            public static final int SUPPRESS_BOTH
087                    = SUPPRESS_LEADING_WS | SUPPRESS_TRAILING_WS;
088    
089            /** Manifest constant: Carry trailing whitespace of one chunk as leading 
090             * whitespace of the next chunk. Used internally; I don't see any reason
091             * to make it public right now.
092             */
093            private static final int CARRY_WS=0x04;
094    
095            /**
096       * Field m_chunkBits sets our chunking strategy, by saying how many
097       * bits of index can be used within a single chunk before flowing over
098       * to the next chunk. For example, if m_chunkbits is set to 15, each
099       * chunk can contain up to 2^15 (32K) characters  
100       */
101      int m_chunkBits = 15;
102    
103      /**
104       * Field m_maxChunkBits affects our chunk-growth strategy, by saying what
105       * the largest permissible chunk size is in this particular FastStringBuffer
106       * hierarchy. 
107       */
108      int m_maxChunkBits = 15;
109    
110      /**
111       * Field m_rechunkBits affects our chunk-growth strategy, by saying how
112       * many chunks should be allocated at one size before we encapsulate them
113       * into the first chunk of the next size up. For example, if m_rechunkBits
114       * is set to 3, then after 8 chunks at a given size we will rebundle
115       * them as the first element of a FastStringBuffer using a chunk size
116       * 8 times larger (chunkBits shifted left three bits).
117       */
118      int m_rebundleBits = 2;
119    
120      /**
121       * Field m_chunkSize establishes the maximum size of one chunk of the array
122       * as 2**chunkbits characters.
123       * (Which may also be the minimum size if we aren't tuning for storage) 
124       */
125      int m_chunkSize;  // =1<<(m_chunkBits-1);
126    
127      /**
128       * Field m_chunkMask is m_chunkSize-1 -- in other words, m_chunkBits
129       * worth of low-order '1' bits, useful for shift-and-mask addressing
130       * within the chunks. 
131       */
132      int m_chunkMask;  // =m_chunkSize-1;
133    
134      /**
135       * Field m_array holds the string buffer's text contents, using an
136       * array-of-arrays. Note that this array, and the arrays it contains, may be
137       * reallocated when necessary in order to allow the buffer to grow;
138       * references to them should be considered to be invalidated after any
139       * append. However, the only time these arrays are directly exposed
140       * is in the sendSAXcharacters call.
141       */
142      char[][] m_array;
143    
144      /**
145       * Field m_lastChunk is an index into m_array[], pointing to the last
146       * chunk of the Chunked Array currently in use. Note that additional
147       * chunks may actually be allocated, eg if the FastStringBuffer had
148       * previously been truncated or if someone issued an ensureSpace request.
149       * <p>
150       * The insertion point for append operations is addressed by the combination
151       * of m_lastChunk and m_firstFree.
152       */
153      int m_lastChunk = 0;
154    
155      /**
156       * Field m_firstFree is an index into m_array[m_lastChunk][], pointing to
157       * the first character in the Chunked Array which is not part of the
158       * FastStringBuffer's current content. Since m_array[][] is zero-based,
159       * the length of that content can be calculated as
160       * (m_lastChunk<<m_chunkBits) + m_firstFree 
161       */
162      int m_firstFree = 0;
163    
164      /**
165       * Field m_innerFSB, when non-null, is a FastStringBuffer whose total
166       * length equals m_chunkSize, and which replaces m_array[0]. This allows
167       * building a hierarchy of FastStringBuffers, where early appends use
168       * a smaller chunkSize (for less wasted memory overhead) but later
169       * ones use a larger chunkSize (for less heap activity overhead).
170       */
171      FastStringBuffer m_innerFSB = null;
172    
173      /**
174       * Construct a FastStringBuffer, with allocation policy as per parameters.
175       * <p>
176       * For coding convenience, I've expressed both allocation sizes in terms of
177       * a number of bits. That's needed for the final size of a chunk,
178       * to permit fast and efficient shift-and-mask addressing. It's less critical
179       * for the inital size, and may be reconsidered.
180       * <p>
181       * An alternative would be to accept integer sizes and round to powers of two;
182       * that really doesn't seem to buy us much, if anything.
183       *
184       * @param initChunkBits Length in characters of the initial allocation
185       * of a chunk, expressed in log-base-2. (That is, 10 means allocate 1024
186       * characters.) Later chunks will use larger allocation units, to trade off
187       * allocation speed of large document against storage efficiency of small
188       * ones.
189       * @param maxChunkBits Number of character-offset bits that should be used for
190       * addressing within a chunk. Maximum length of a chunk is 2^chunkBits
191       * characters.
192       * @param rebundleBits Number of character-offset bits that addressing should
193       * advance before we attempt to take a step from initChunkBits to maxChunkBits
194       */
195      public FastStringBuffer(int initChunkBits, int maxChunkBits,
196                              int rebundleBits)
197      {
198        if(DEBUG_FORCE_INIT_BITS!=0) initChunkBits=DEBUG_FORCE_INIT_BITS;
199        
200        // %REVIEW%
201        // Should this force to larger value, or smaller? Smaller less efficient, but if
202        // someone requested variable mode it's because they care about storage space.
203        // On the other hand, given the other changes I'm making, odds are that we should
204        // adopt the larger size. Dither, dither, dither... This is just stopgap workaround
205        // anyway; we need a permanant solution.
206        //
207        if(DEBUG_FORCE_FIXED_CHUNKSIZE) maxChunkBits=initChunkBits;
208        //if(DEBUG_FORCE_FIXED_CHUNKSIZE) initChunkBits=maxChunkBits;
209    
210        m_array = new char[16][];
211    
212        // Don't bite off more than we're prepared to swallow!
213        if (initChunkBits > maxChunkBits)
214          initChunkBits = maxChunkBits;
215    
216        m_chunkBits = initChunkBits;
217        m_maxChunkBits = maxChunkBits;
218        m_rebundleBits = rebundleBits;
219        m_chunkSize = 1 << (initChunkBits);
220        m_chunkMask = m_chunkSize - 1;
221        m_array[0] = new char[m_chunkSize];
222      }
223    
224      /**
225       * Construct a FastStringBuffer, using a default rebundleBits value.
226       *
227       * NEEDSDOC @param initChunkBits
228       * NEEDSDOC @param maxChunkBits
229       */
230      public FastStringBuffer(int initChunkBits, int maxChunkBits)
231      {
232        this(initChunkBits, maxChunkBits, 2);
233      }
234    
235      /**
236       * Construct a FastStringBuffer, using default maxChunkBits and
237       * rebundleBits values.
238       * <p>
239       * ISSUE: Should this call assert initial size, or fixed size?
240       * Now configured as initial, with a default for fixed.
241       *
242       * NEEDSDOC @param initChunkBits
243       */
244      public FastStringBuffer(int initChunkBits)
245      {
246        this(initChunkBits, 15, 2);
247      }
248    
249      /**
250       * Construct a FastStringBuffer, using a default allocation policy.
251       */
252      public FastStringBuffer()
253      {
254    
255        // 10 bits is 1K. 15 bits is 32K. Remember that these are character
256        // counts, so actual memory allocation unit is doubled for UTF-16 chars.
257        //
258        // For reference: In the original FastStringBuffer, we simply
259        // overallocated by blocksize (default 1KB) on each buffer-growth.
260        this(10, 15, 2);
261      }
262    
263      /**
264       * Get the length of the list. Synonym for length().
265       *
266       * @return the number of characters in the FastStringBuffer's content.
267       */
268      public final int size()
269      {
270        return (m_lastChunk << m_chunkBits) + m_firstFree;
271      }
272    
273      /**
274       * Get the length of the list. Synonym for size().
275       *
276       * @return the number of characters in the FastStringBuffer's content.
277       */
278      public final int length()
279      {
280        return (m_lastChunk << m_chunkBits) + m_firstFree;
281      }
282    
283      /**
284       * Discard the content of the FastStringBuffer, and most of the memory
285       * that was allocated by it, restoring the initial state. Note that this
286       * may eventually be different from setLength(0), which see.
287       */
288      public final void reset()
289      {
290    
291        m_lastChunk = 0;
292        m_firstFree = 0;
293    
294        // Recover the original chunk size
295        FastStringBuffer innermost = this;
296    
297        while (innermost.m_innerFSB != null)
298        {
299          innermost = innermost.m_innerFSB;
300        }
301    
302        m_chunkBits = innermost.m_chunkBits;
303        m_chunkSize = innermost.m_chunkSize;
304        m_chunkMask = innermost.m_chunkMask;
305    
306        // Discard the hierarchy
307        m_innerFSB = null;
308        m_array = new char[16][0];
309        m_array[0] = new char[m_chunkSize];
310      }
311    
312      /**
313       * Directly set how much of the FastStringBuffer's storage is to be
314       * considered part of its content. This is a fast but hazardous
315       * operation. It is not protected against negative values, or values
316       * greater than the amount of storage currently available... and even
317       * if additional storage does exist, its contents are unpredictable.
318       * The only safe use for our setLength() is to truncate the FastStringBuffer
319       * to a shorter string.
320       *
321       * @param l New length. If l<0 or l>=getLength(), this operation will
322       * not report an error but future operations will almost certainly fail.
323       */
324      public final void setLength(int l)
325      {
326        m_lastChunk = l >>> m_chunkBits;
327    
328        if (m_lastChunk == 0 && m_innerFSB != null)
329        {
330          // Replace this FSB with the appropriate inner FSB, truncated
331          m_innerFSB.setLength(l, this);
332        }
333        else
334        {
335          m_firstFree = l & m_chunkMask;
336          
337              // There's an edge case if l is an exact multiple of m_chunkBits, which risks leaving
338              // us pointing at the start of a chunk which has not yet been allocated. Rather than 
339              // pay the cost of dealing with that in the append loops (more scattered and more
340              // inner-loop), we correct it here by moving to the safe side of that
341              // line -- as we would have left the indexes had we appended up to that point.
342          if(m_firstFree==0 && m_lastChunk>0)
343          {
344            --m_lastChunk;
345            m_firstFree=m_chunkSize;
346          }
347        }
348      }
349    
350      /**
351       * Subroutine for the public setLength() method. Deals with the fact
352       * that truncation may require restoring one of the innerFSBs
353       *
354       * NEEDSDOC @param l
355       * NEEDSDOC @param rootFSB
356       */
357      private final void setLength(int l, FastStringBuffer rootFSB)
358      {
359    
360        m_lastChunk = l >>> m_chunkBits;
361    
362        if (m_lastChunk == 0 && m_innerFSB != null)
363        {
364          m_innerFSB.setLength(l, rootFSB);
365        }
366        else
367        {
368    
369          // Undo encapsulation -- pop the innerFSB data back up to root.
370          // Inefficient, but attempts to keep the code simple.
371          rootFSB.m_chunkBits = m_chunkBits;
372          rootFSB.m_maxChunkBits = m_maxChunkBits;
373          rootFSB.m_rebundleBits = m_rebundleBits;
374          rootFSB.m_chunkSize = m_chunkSize;
375          rootFSB.m_chunkMask = m_chunkMask;
376          rootFSB.m_array = m_array;
377          rootFSB.m_innerFSB = m_innerFSB;
378          rootFSB.m_lastChunk = m_lastChunk;
379    
380          // Finally, truncate this sucker.
381          rootFSB.m_firstFree = l & m_chunkMask;
382        }
383      }
384    
385      /**
386       * Note that this operation has been somewhat deoptimized by the shift to a
387       * chunked array, as there is no factory method to produce a String object
388       * directly from an array of arrays and hence a double copy is needed.
389       * By using ensureCapacity we hope to minimize the heap overhead of building
390       * the intermediate StringBuffer.
391       * <p>
392       * (It really is a pity that Java didn't design String as a final subclass
393       * of MutableString, rather than having StringBuffer be a separate hierarchy.
394       * We'd avoid a <strong>lot</strong> of double-buffering.)
395       *
396       * @return the contents of the FastStringBuffer as a standard Java string.
397       */
398      public final String toString()
399      {
400    
401        int length = (m_lastChunk << m_chunkBits) + m_firstFree;
402    
403        return getString(new StringBuffer(length), 0, 0, length).toString();
404      }
405    
406      /**
407       * Append a single character onto the FastStringBuffer, growing the
408       * storage if necessary.
409       * <p>
410       * NOTE THAT after calling append(), previously obtained
411       * references to m_array[][] may no longer be valid....
412       * though in fact they should be in this instance.
413       *
414       * @param value character to be appended.
415       */
416      public final void append(char value)
417      {
418        
419        char[] chunk;
420    
421        // We may have preallocated chunks. If so, all but last should
422        // be at full size.
423    
424        if (m_firstFree < m_chunkSize)  // Simplified test single-character-fits
425          chunk = m_array[m_lastChunk];
426        else
427        {
428    
429          // Extend array?
430          int i = m_array.length;
431    
432          if (m_lastChunk + 1 == i)
433          {
434            char[][] newarray = new char[i + 16][];
435    
436            System.arraycopy(m_array, 0, newarray, 0, i);
437    
438            m_array = newarray;
439          }
440    
441          // Advance one chunk
442          chunk = m_array[++m_lastChunk];
443    
444          if (chunk == null)
445          {
446    
447            // Hierarchical encapsulation
448            if (m_lastChunk == 1 << m_rebundleBits
449                    && m_chunkBits < m_maxChunkBits)
450            {
451    
452              // Should do all the work of both encapsulating
453              // existing data and establishing new sizes/offsets
454              m_innerFSB = new FastStringBuffer(this);
455            }
456    
457            // Add a chunk.
458            chunk = m_array[m_lastChunk] = new char[m_chunkSize];
459          }
460    
461          m_firstFree = 0;
462        }
463    
464        // Space exists in the chunk. Append the character.
465        chunk[m_firstFree++] = value;
466      }
467    
468      /**
469       * Append the contents of a String onto the FastStringBuffer,
470       * growing the storage if necessary.
471       * <p>
472       * NOTE THAT after calling append(), previously obtained
473       * references to m_array[] may no longer be valid.
474       *
475       * @param value String whose contents are to be appended.
476       */
477      public final void append(String value)
478      {
479    
480        if (value == null) 
481          return;
482        int strlen = value.length();
483    
484        if (0 == strlen)
485          return;
486    
487        int copyfrom = 0;
488        char[] chunk = m_array[m_lastChunk];
489        int available = m_chunkSize - m_firstFree;
490    
491        // Repeat while data remains to be copied
492        while (strlen > 0)
493        {
494    
495          // Copy what fits
496          if (available > strlen)
497            available = strlen;
498    
499          value.getChars(copyfrom, copyfrom + available, m_array[m_lastChunk],
500                         m_firstFree);
501    
502          strlen -= available;
503          copyfrom += available;
504    
505          // If there's more left, allocate another chunk and continue
506          if (strlen > 0)
507          {
508    
509            // Extend array?
510            int i = m_array.length;
511    
512            if (m_lastChunk + 1 == i)
513            {
514              char[][] newarray = new char[i + 16][];
515    
516              System.arraycopy(m_array, 0, newarray, 0, i);
517    
518              m_array = newarray;
519            }
520    
521            // Advance one chunk
522            chunk = m_array[++m_lastChunk];
523    
524            if (chunk == null)
525            {
526    
527              // Hierarchical encapsulation
528              if (m_lastChunk == 1 << m_rebundleBits
529                      && m_chunkBits < m_maxChunkBits)
530              {
531    
532                // Should do all the work of both encapsulating
533                // existing data and establishing new sizes/offsets
534                m_innerFSB = new FastStringBuffer(this);
535              }
536    
537              // Add a chunk. 
538              chunk = m_array[m_lastChunk] = new char[m_chunkSize];
539            }
540    
541            available = m_chunkSize;
542            m_firstFree = 0;
543          }
544        }
545    
546        // Adjust the insert point in the last chunk, when we've reached it.
547        m_firstFree += available;
548      }
549    
550      /**
551       * Append the contents of a StringBuffer onto the FastStringBuffer,
552       * growing the storage if necessary.
553       * <p>
554       * NOTE THAT after calling append(), previously obtained
555       * references to m_array[] may no longer be valid.
556       *
557       * @param value StringBuffer whose contents are to be appended.
558       */
559      public final void append(StringBuffer value)
560      {
561    
562        if (value == null) 
563          return;
564        int strlen = value.length();
565    
566        if (0 == strlen)
567          return;
568    
569        int copyfrom = 0;
570        char[] chunk = m_array[m_lastChunk];
571        int available = m_chunkSize - m_firstFree;
572    
573        // Repeat while data remains to be copied
574        while (strlen > 0)
575        {
576    
577          // Copy what fits
578          if (available > strlen)
579            available = strlen;
580    
581          value.getChars(copyfrom, copyfrom + available, m_array[m_lastChunk],
582                         m_firstFree);
583    
584          strlen -= available;
585          copyfrom += available;
586    
587          // If there's more left, allocate another chunk and continue
588          if (strlen > 0)
589          {
590    
591            // Extend array?
592            int i = m_array.length;
593    
594            if (m_lastChunk + 1 == i)
595            {
596              char[][] newarray = new char[i + 16][];
597    
598              System.arraycopy(m_array, 0, newarray, 0, i);
599    
600              m_array = newarray;
601            }
602    
603            // Advance one chunk
604            chunk = m_array[++m_lastChunk];
605    
606            if (chunk == null)
607            {
608    
609              // Hierarchical encapsulation
610              if (m_lastChunk == 1 << m_rebundleBits
611                      && m_chunkBits < m_maxChunkBits)
612              {
613    
614                // Should do all the work of both encapsulating
615                // existing data and establishing new sizes/offsets
616                m_innerFSB = new FastStringBuffer(this);
617              }
618    
619              // Add a chunk.
620              chunk = m_array[m_lastChunk] = new char[m_chunkSize];
621            }
622    
623            available = m_chunkSize;
624            m_firstFree = 0;
625          }
626        }
627    
628        // Adjust the insert point in the last chunk, when we've reached it.
629        m_firstFree += available;
630      }
631    
632      /**
633       * Append part of the contents of a Character Array onto the
634       * FastStringBuffer,  growing the storage if necessary.
635       * <p>
636       * NOTE THAT after calling append(), previously obtained
637       * references to m_array[] may no longer be valid.
638       *
639       * @param chars character array from which data is to be copied
640       * @param start offset in chars of first character to be copied,
641       * zero-based.
642       * @param length number of characters to be copied
643       */
644      public final void append(char[] chars, int start, int length)
645      {
646    
647        int strlen = length;
648    
649        if (0 == strlen)
650          return;
651    
652        int copyfrom = start;
653        char[] chunk = m_array[m_lastChunk];
654        int available = m_chunkSize - m_firstFree;
655    
656        // Repeat while data remains to be copied
657        while (strlen > 0)
658        {
659    
660          // Copy what fits
661          if (available > strlen)
662            available = strlen;
663    
664          System.arraycopy(chars, copyfrom, m_array[m_lastChunk], m_firstFree,
665                           available);
666    
667          strlen -= available;
668          copyfrom += available;
669    
670          // If there's more left, allocate another chunk and continue
671          if (strlen > 0)
672          {
673    
674            // Extend array?
675            int i = m_array.length;
676    
677            if (m_lastChunk + 1 == i)
678            {
679              char[][] newarray = new char[i + 16][];
680    
681              System.arraycopy(m_array, 0, newarray, 0, i);
682    
683              m_array = newarray;
684            }
685    
686            // Advance one chunk
687            chunk = m_array[++m_lastChunk];
688    
689            if (chunk == null)
690            {
691    
692              // Hierarchical encapsulation
693              if (m_lastChunk == 1 << m_rebundleBits
694                      && m_chunkBits < m_maxChunkBits)
695              {
696    
697                // Should do all the work of both encapsulating
698                // existing data and establishing new sizes/offsets
699                m_innerFSB = new FastStringBuffer(this);
700              }
701    
702              // Add a chunk.
703              chunk = m_array[m_lastChunk] = new char[m_chunkSize];
704            }
705    
706            available = m_chunkSize;
707            m_firstFree = 0;
708          }
709        }
710    
711        // Adjust the insert point in the last chunk, when we've reached it.
712        m_firstFree += available;
713      }
714    
715      /**
716       * Append the contents of another FastStringBuffer onto
717       * this FastStringBuffer, growing the storage if necessary.
718       * <p>
719       * NOTE THAT after calling append(), previously obtained
720       * references to m_array[] may no longer be valid.
721       *
722       * @param value FastStringBuffer whose contents are
723       * to be appended.
724       */
725      public final void append(FastStringBuffer value)
726      {
727    
728        // Complicating factor here is that the two buffers may use
729        // different chunk sizes, and even if they're the same we're
730        // probably on a different alignment due to previously appended
731        // data. We have to work through the source in bite-sized chunks.
732        if (value == null) 
733          return;
734        int strlen = value.length();
735    
736        if (0 == strlen)
737          return;
738    
739        int copyfrom = 0;
740        char[] chunk = m_array[m_lastChunk];
741        int available = m_chunkSize - m_firstFree;
742    
743        // Repeat while data remains to be copied
744        while (strlen > 0)
745        {
746    
747          // Copy what fits
748          if (available > strlen)
749            available = strlen;
750    
751          int sourcechunk = (copyfrom + value.m_chunkSize - 1)
752                            >>> value.m_chunkBits;
753          int sourcecolumn = copyfrom & value.m_chunkMask;
754          int runlength = value.m_chunkSize - sourcecolumn;
755    
756          if (runlength > available)
757            runlength = available;
758    
759          System.arraycopy(value.m_array[sourcechunk], sourcecolumn,
760                           m_array[m_lastChunk], m_firstFree, runlength);
761    
762          if (runlength != available)
763            System.arraycopy(value.m_array[sourcechunk + 1], 0,
764                             m_array[m_lastChunk], m_firstFree + runlength,
765                             available - runlength);
766    
767          strlen -= available;
768          copyfrom += available;
769    
770          // If there's more left, allocate another chunk and continue
771          if (strlen > 0)
772          {
773    
774            // Extend array?
775            int i = m_array.length;
776    
777            if (m_lastChunk + 1 == i)
778            {
779              char[][] newarray = new char[i + 16][];
780    
781              System.arraycopy(m_array, 0, newarray, 0, i);
782    
783              m_array = newarray;
784            }
785    
786            // Advance one chunk
787            chunk = m_array[++m_lastChunk];
788    
789            if (chunk == null)
790            {
791    
792              // Hierarchical encapsulation
793              if (m_lastChunk == 1 << m_rebundleBits
794                      && m_chunkBits < m_maxChunkBits)
795              {
796    
797                // Should do all the work of both encapsulating
798                // existing data and establishing new sizes/offsets
799                m_innerFSB = new FastStringBuffer(this);
800              }
801    
802              // Add a chunk. 
803              chunk = m_array[m_lastChunk] = new char[m_chunkSize];
804            }
805    
806            available = m_chunkSize;
807            m_firstFree = 0;
808          }
809        }
810    
811        // Adjust the insert point in the last chunk, when we've reached it.
812        m_firstFree += available;
813      }
814    
815      /**
816       * @return true if the specified range of characters are all whitespace,
817       * as defined by XMLCharacterRecognizer.
818       * <p>
819       * CURRENTLY DOES NOT CHECK FOR OUT-OF-RANGE.
820       *
821       * @param start Offset of first character in the range.
822       * @param length Number of characters to send.
823       */
824      public boolean isWhitespace(int start, int length)
825      {
826    
827        int sourcechunk = start >>> m_chunkBits;
828        int sourcecolumn = start & m_chunkMask;
829        int available = m_chunkSize - sourcecolumn;
830        boolean chunkOK;
831    
832        while (length > 0)
833        {
834          int runlength = (length <= available) ? length : available;
835    
836          if (sourcechunk == 0 && m_innerFSB != null)
837            chunkOK = m_innerFSB.isWhitespace(sourcecolumn, runlength);
838          else
839            chunkOK = org.apache.xml.utils.XMLCharacterRecognizer.isWhiteSpace(
840              m_array[sourcechunk], sourcecolumn, runlength);
841    
842          if (!chunkOK)
843            return false;
844    
845          length -= runlength;
846    
847          ++sourcechunk;
848    
849          sourcecolumn = 0;
850          available = m_chunkSize;
851        }
852    
853        return true;
854      }
855    
856      /**
857       * @param start Offset of first character in the range.
858       * @param length Number of characters to send.
859       * @return a new String object initialized from the specified range of
860       * characters.
861       */
862      public String getString(int start, int length)
863      {
864        int startColumn = start & m_chunkMask;
865        int startChunk = start >>> m_chunkBits;
866        if (startColumn + length < m_chunkMask && m_innerFSB == null) {
867          return getOneChunkString(startChunk, startColumn, length);
868        }
869        return getString(new StringBuffer(length), startChunk, startColumn,
870                         length).toString();
871      }
872    
873      protected String getOneChunkString(int startChunk, int startColumn,
874                                         int length) {
875        return new String(m_array[startChunk], startColumn, length);
876      }
877    
878      /**
879       * @param sb StringBuffer to be appended to
880       * @param start Offset of first character in the range.
881       * @param length Number of characters to send.
882       * @return sb with the requested text appended to it
883       */
884      StringBuffer getString(StringBuffer sb, int start, int length)
885      {
886        return getString(sb, start >>> m_chunkBits, start & m_chunkMask, length);
887      }
888    
889      /**
890       * Internal support for toString() and getString().
891       * PLEASE NOTE SIGNATURE CHANGE from earlier versions; it now appends into
892       * and returns a StringBuffer supplied by the caller. This simplifies
893       * m_innerFSB support.
894       * <p>
895       * Note that this operation has been somewhat deoptimized by the shift to a
896       * chunked array, as there is no factory method to produce a String object
897       * directly from an array of arrays and hence a double copy is needed.
898       * By presetting length we hope to minimize the heap overhead of building
899       * the intermediate StringBuffer.
900       * <p>
901       * (It really is a pity that Java didn't design String as a final subclass
902       * of MutableString, rather than having StringBuffer be a separate hierarchy.
903       * We'd avoid a <strong>lot</strong> of double-buffering.)
904       *
905       *
906       * @param sb
907       * @param startChunk
908       * @param startColumn
909       * @param length
910       * 
911       * @return the contents of the FastStringBuffer as a standard Java string.
912       */
913      StringBuffer getString(StringBuffer sb, int startChunk, int startColumn,
914                             int length)
915      {
916    
917        int stop = (startChunk << m_chunkBits) + startColumn + length;
918        int stopChunk = stop >>> m_chunkBits;
919        int stopColumn = stop & m_chunkMask;
920    
921        // Factored out
922        //StringBuffer sb=new StringBuffer(length);
923        for (int i = startChunk; i < stopChunk; ++i)
924        {
925          if (i == 0 && m_innerFSB != null)
926            m_innerFSB.getString(sb, startColumn, m_chunkSize - startColumn);
927          else
928            sb.append(m_array[i], startColumn, m_chunkSize - startColumn);
929    
930          startColumn = 0;  // after first chunk
931        }
932    
933        if (stopChunk == 0 && m_innerFSB != null)
934          m_innerFSB.getString(sb, startColumn, stopColumn - startColumn);
935        else if (stopColumn > startColumn)
936          sb.append(m_array[stopChunk], startColumn, stopColumn - startColumn);
937    
938        return sb;
939      }
940    
941      /**
942       * Get a single character from the string buffer.
943       *
944       *
945       * @param pos character position requested.
946       * @return A character from the requested position.
947       */
948      public char charAt(int pos)
949      {
950        int startChunk = pos >>> m_chunkBits;
951    
952        if (startChunk == 0 && m_innerFSB != null)
953          return m_innerFSB.charAt(pos & m_chunkMask);
954        else
955          return m_array[startChunk][pos & m_chunkMask];
956      }
957    
958      /**
959       * Sends the specified range of characters as one or more SAX characters()
960       * events.
961       * Note that the buffer reference passed to the ContentHandler may be
962       * invalidated if the FastStringBuffer is edited; it's the user's
963       * responsibility to manage access to the FastStringBuffer to prevent this
964       * problem from arising.
965       * <p>
966       * Note too that there is no promise that the output will be sent as a
967       * single call. As is always true in SAX, one logical string may be split
968       * across multiple blocks of memory and hence delivered as several
969       * successive events.
970       *
971       * @param ch SAX ContentHandler object to receive the event.
972       * @param start Offset of first character in the range.
973       * @param length Number of characters to send.
974       * @exception org.xml.sax.SAXException may be thrown by handler's
975       * characters() method.
976       */
977      public void sendSAXcharacters(
978              org.xml.sax.ContentHandler ch, int start, int length)
979                throws org.xml.sax.SAXException
980      {
981    
982        int startChunk = start >>> m_chunkBits;
983        int startColumn = start & m_chunkMask;
984        if (startColumn + length < m_chunkMask && m_innerFSB == null) {
985            ch.characters(m_array[startChunk], startColumn, length);
986            return;
987        }
988        
989        int stop = start + length;
990        int stopChunk = stop >>> m_chunkBits;
991        int stopColumn = stop & m_chunkMask;
992    
993        for (int i = startChunk; i < stopChunk; ++i)
994        {
995          if (i == 0 && m_innerFSB != null)
996            m_innerFSB.sendSAXcharacters(ch, startColumn,
997                                         m_chunkSize - startColumn);
998          else
999            ch.characters(m_array[i], startColumn, m_chunkSize - startColumn);
1000    
1001          startColumn = 0;  // after first chunk
1002        }
1003    
1004        // Last, or only, chunk
1005        if (stopChunk == 0 && m_innerFSB != null)
1006          m_innerFSB.sendSAXcharacters(ch, startColumn, stopColumn - startColumn);
1007        else if (stopColumn > startColumn)
1008        {
1009          ch.characters(m_array[stopChunk], startColumn,
1010                        stopColumn - startColumn);
1011        }
1012      }
1013      
1014      /**
1015       * Sends the specified range of characters as one or more SAX characters()
1016       * events, normalizing the characters according to XSLT rules.
1017       *
1018       * @param ch SAX ContentHandler object to receive the event.
1019       * @param start Offset of first character in the range.
1020       * @param length Number of characters to send.
1021       * @return normalization status to apply to next chunk (because we may
1022       * have been called recursively to process an inner FSB):
1023       * <dl>
1024       * <dt>0</dt>
1025       * <dd>if this output did not end in retained whitespace, and thus whitespace
1026       * at the start of the following chunk (if any) should be converted to a
1027       * single space.
1028       * <dt>SUPPRESS_LEADING_WS</dt>
1029       * <dd>if this output ended in retained whitespace, and thus whitespace
1030       * at the start of the following chunk (if any) should be completely
1031       * suppressed.</dd>
1032       * </dd>
1033       * </dl>
1034       * @exception org.xml.sax.SAXException may be thrown by handler's
1035       * characters() method.
1036       */
1037      public int sendNormalizedSAXcharacters(
1038              org.xml.sax.ContentHandler ch, int start, int length)
1039                throws org.xml.sax.SAXException
1040      {
1041            // This call always starts at the beginning of the 
1042        // string being written out, either because it was called directly or
1043        // because it was an m_innerFSB recursion. This is important since
1044            // it gives us a well-known initial state for this flag:
1045            int stateForNextChunk=SUPPRESS_LEADING_WS;
1046    
1047        int stop = start + length;
1048        int startChunk = start >>> m_chunkBits;
1049        int startColumn = start & m_chunkMask;
1050        int stopChunk = stop >>> m_chunkBits;
1051        int stopColumn = stop & m_chunkMask;
1052    
1053        for (int i = startChunk; i < stopChunk; ++i)
1054        {
1055          if (i == 0 && m_innerFSB != null)
1056                                    stateForNextChunk=
1057            m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn,
1058                                         m_chunkSize - startColumn);
1059          else
1060                                    stateForNextChunk=
1061            sendNormalizedSAXcharacters(m_array[i], startColumn, 
1062                                        m_chunkSize - startColumn, 
1063                                                                                                                                                    ch,stateForNextChunk);
1064    
1065          startColumn = 0;  // after first chunk
1066        }
1067    
1068        // Last, or only, chunk
1069        if (stopChunk == 0 && m_innerFSB != null)
1070                            stateForNextChunk= // %REVIEW% Is this update really needed?
1071          m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn, stopColumn - startColumn);
1072        else if (stopColumn > startColumn)
1073        {
1074                            stateForNextChunk= // %REVIEW% Is this update really needed?
1075          sendNormalizedSAXcharacters(m_array[stopChunk], 
1076                                                                                                                                            startColumn, stopColumn - startColumn,
1077                                                                                                                                            ch, stateForNextChunk | SUPPRESS_TRAILING_WS);
1078        }
1079                    return stateForNextChunk;
1080      }
1081      
1082      static final char[] SINGLE_SPACE = {' '};
1083              
1084      /**
1085       * Internal method to directly normalize and dispatch the character array.
1086       * This version is aware of the fact that it may be called several times
1087       * in succession if the data is made up of multiple "chunks", and thus
1088       * must actively manage the handling of leading and trailing whitespace.
1089       * 
1090       * Note: The recursion is due to the possible recursion of inner FSBs.
1091       *
1092       * @param ch The characters from the XML document.
1093       * @param start The start position in the array.
1094       * @param length The number of characters to read from the array.
1095       * @param handler SAX ContentHandler object to receive the event.
1096       * @param edgeTreatmentFlags How leading/trailing spaces should be handled. 
1097       * This is a bitfield contining two flags, bitwise-ORed together:
1098       * <dl>
1099       * <dt>SUPPRESS_LEADING_WS</dt>
1100       * <dd>When false, causes leading whitespace to be converted to a single
1101       * space; when true, causes it to be discarded entirely.
1102       * Should be set TRUE for the first chunk, and (in multi-chunk output)
1103       * whenever the previous chunk ended in retained whitespace.</dd>
1104       * <dt>SUPPRESS_TRAILING_WS</dt>
1105       * <dd>When false, causes trailing whitespace to be converted to a single
1106       * space; when true, causes it to be discarded entirely.
1107       * Should be set TRUE for the last or only chunk.
1108       * </dd>
1109       * </dl>
1110       * @return normalization status, as in the edgeTreatmentFlags parameter:
1111       * <dl>
1112       * <dt>0</dt>
1113       * <dd>if this output did not end in retained whitespace, and thus whitespace
1114       * at the start of the following chunk (if any) should be converted to a
1115       * single space.
1116       * <dt>SUPPRESS_LEADING_WS</dt>
1117       * <dd>if this output ended in retained whitespace, and thus whitespace
1118       * at the start of the following chunk (if any) should be completely
1119       * suppressed.</dd>
1120       * </dd>
1121       * </dl>
1122       *
1123       * 
1124       * @exception org.xml.sax.SAXException Any SAX exception, possibly
1125       *            wrapping another exception.
1126       */
1127      static int sendNormalizedSAXcharacters(char ch[], 
1128                 int start, int length, 
1129                 org.xml.sax.ContentHandler handler,
1130                                                     int edgeTreatmentFlags)
1131              throws org.xml.sax.SAXException
1132      {
1133         boolean processingLeadingWhitespace =
1134                           ((edgeTreatmentFlags & SUPPRESS_LEADING_WS) != 0);
1135         boolean seenWhitespace = ((edgeTreatmentFlags & CARRY_WS) != 0);
1136         int currPos = start;
1137         int limit = start+length;
1138    
1139         // Strip any leading spaces first, if required
1140         if (processingLeadingWhitespace) {
1141             for (; currPos < limit
1142                    && XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
1143                  currPos++) { }
1144    
1145             // If we've only encountered leading spaces, the
1146             // current state remains unchanged
1147             if (currPos == limit) {
1148                 return edgeTreatmentFlags;
1149             }
1150         }
1151    
1152         // If we get here, there are no more leading spaces to strip
1153         while (currPos < limit) {
1154             int startNonWhitespace = currPos;
1155    
1156             // Grab a chunk of non-whitespace characters
1157             for (; currPos < limit
1158                    && !XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
1159                  currPos++) { }
1160    
1161             // Non-whitespace seen - emit them, along with a single
1162             // space for any preceding whitespace characters
1163             if (startNonWhitespace != currPos) {
1164                 if (seenWhitespace) {
1165                     handler.characters(SINGLE_SPACE, 0, 1);
1166                     seenWhitespace = false;
1167                 }
1168                 handler.characters(ch, startNonWhitespace,
1169                                    currPos - startNonWhitespace);
1170             }
1171    
1172             int startWhitespace = currPos;
1173    
1174             // Consume any whitespace characters
1175             for (; currPos < limit
1176                    && XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
1177                  currPos++) { }
1178    
1179             if (startWhitespace != currPos) {
1180                 seenWhitespace = true;
1181             }
1182         }
1183    
1184         return (seenWhitespace ? CARRY_WS : 0)
1185                | (edgeTreatmentFlags & SUPPRESS_TRAILING_WS);
1186      }
1187    
1188      /**
1189       * Directly normalize and dispatch the character array.
1190       *
1191       * @param ch The characters from the XML document.
1192       * @param start The start position in the array.
1193       * @param length The number of characters to read from the array.
1194       * @param handler SAX ContentHandler object to receive the event.
1195       * @exception org.xml.sax.SAXException Any SAX exception, possibly
1196       *            wrapping another exception.
1197       */
1198      public static void sendNormalizedSAXcharacters(char ch[], 
1199                 int start, int length, 
1200                 org.xml.sax.ContentHandler handler)
1201              throws org.xml.sax.SAXException
1202      {
1203                    sendNormalizedSAXcharacters(ch, start, length, 
1204                 handler, SUPPRESS_BOTH);
1205            }
1206                    
1207            /**
1208       * Sends the specified range of characters as sax Comment.
1209       * <p>
1210       * Note that, unlike sendSAXcharacters, this has to be done as a single 
1211       * call to LexicalHandler#comment.
1212       *
1213       * @param ch SAX LexicalHandler object to receive the event.
1214       * @param start Offset of first character in the range.
1215       * @param length Number of characters to send.
1216       * @exception org.xml.sax.SAXException may be thrown by handler's
1217       * characters() method.
1218       */
1219      public void sendSAXComment(
1220              org.xml.sax.ext.LexicalHandler ch, int start, int length)
1221                throws org.xml.sax.SAXException
1222      {
1223    
1224        // %OPT% Do it this way for now...
1225        String comment = getString(start, length);
1226        ch.comment(comment.toCharArray(), 0, length);
1227      }
1228    
1229      /**
1230       * Copies characters from this string into the destination character
1231       * array.
1232       *
1233       * @param      srcBegin   index of the first character in the string
1234       *                        to copy.
1235       * @param      srcEnd     index after the last character in the string
1236       *                        to copy.
1237       * @param      dst        the destination array.
1238       * @param      dstBegin   the start offset in the destination array.
1239       * @exception IndexOutOfBoundsException If any of the following
1240       *            is true:
1241       *            <ul><li><code>srcBegin</code> is negative.
1242       *            <li><code>srcBegin</code> is greater than <code>srcEnd</code>
1243       *            <li><code>srcEnd</code> is greater than the length of this
1244       *                string
1245       *            <li><code>dstBegin</code> is negative
1246       *            <li><code>dstBegin+(srcEnd-srcBegin)</code> is larger than
1247       *                <code>dst.length</code></ul>
1248       * @exception NullPointerException if <code>dst</code> is <code>null</code>
1249       */
1250      private void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin)
1251      {
1252        // %TBD% Joe needs to write this function.  Make public when implemented.
1253      }
1254    
1255      /**
1256       * Encapsulation c'tor. After this is called, the source FastStringBuffer
1257       * will be reset to use the new object as its m_innerFSB, and will have
1258       * had its chunk size reset appropriately. IT SHOULD NEVER BE CALLED
1259       * EXCEPT WHEN source.length()==1<<(source.m_chunkBits+source.m_rebundleBits)
1260       *
1261       * NEEDSDOC @param source
1262       */
1263      private FastStringBuffer(FastStringBuffer source)
1264      {
1265    
1266        // Copy existing information into new encapsulation
1267        m_chunkBits = source.m_chunkBits;
1268        m_maxChunkBits = source.m_maxChunkBits;
1269        m_rebundleBits = source.m_rebundleBits;
1270        m_chunkSize = source.m_chunkSize;
1271        m_chunkMask = source.m_chunkMask;
1272        m_array = source.m_array;
1273        m_innerFSB = source.m_innerFSB;
1274    
1275        // These have to be adjusted because we're calling just at the time
1276        // when we would be about to allocate another chunk
1277        m_lastChunk = source.m_lastChunk - 1;
1278        m_firstFree = source.m_chunkSize;
1279    
1280        // Establish capsule as the Inner FSB, reset chunk sizes/addressing
1281        source.m_array = new char[16][];
1282        source.m_innerFSB = this;
1283    
1284        // Since we encapsulated just as we were about to append another
1285        // chunk, return ready to create the chunk after the innerFSB
1286        // -- 1, not 0.
1287        source.m_lastChunk = 1;
1288        source.m_firstFree = 0;
1289        source.m_chunkBits += m_rebundleBits;
1290        source.m_chunkSize = 1 << (source.m_chunkBits);
1291        source.m_chunkMask = source.m_chunkSize - 1;
1292      }
1293    }