1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the  "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 *     http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18/*
19 * $Id: FastStringBuffer.java 469279 2006-10-30 21:18:02Z minchau $
20 */
21package org.apache.xml.utils;
22
23/**
24 * Bare-bones, unsafe, fast string buffer. No thread-safety, no
25 * parameter range checking, exposed fields. Note that in typical
26 * applications, thread-safety of a StringBuffer is a somewhat
27 * dubious concept in any case.
28 * <p>
29 * Note that Stree and DTM used a single FastStringBuffer as a string pool,
30 * by recording start and length indices within this single buffer. This
31 * minimizes heap overhead, but of course requires more work when retrieving
32 * the data.
33 * <p>
34 * FastStringBuffer operates as a "chunked buffer". Doing so
35 * reduces the need to recopy existing information when an append
36 * exceeds the space available; we just allocate another chunk and
37 * flow across to it. (The array of chunks may need to grow,
38 * admittedly, but that's a much smaller object.) Some excess
39 * recopying may arise when we extract Strings which cross chunk
40 * boundaries; larger chunks make that less frequent.
41 * <p>
42 * The size values are parameterized, to allow tuning this code. In
43 * theory, Result Tree Fragments might want to be tuned differently
44 * from the main document's text.
45 * <p>
46 * %REVIEW% An experiment in self-tuning is
47 * included in the code (using nested FastStringBuffers to achieve
48 * variation in chunk sizes), but this implementation has proven to
49 * be problematic when data may be being copied from the FSB into itself.
50 * We should either re-architect that to make this safe (if possible)
51 * or remove that code and clean up for performance/maintainability reasons.
52 * <p>
53 */
54public class FastStringBuffer
55{
56  // If nonzero, forces the inial chunk size.
57  /**/static final int DEBUG_FORCE_INIT_BITS=0;
58
59  	// %BUG% %REVIEW% *****PROBLEM SUSPECTED: If data from an FSB is being copied
60  	// back into the same FSB (variable set from previous variable, for example)
61  	// and blocksize changes in mid-copy... there's risk of severe malfunction in
62  	// the read process, due to how the resizing code re-jiggers storage. Arggh.
63  	// If we want to retain the variable-size-block feature, we need to reconsider
64  	// that issue. For now, I have forced us into fixed-size mode.
65    static final boolean DEBUG_FORCE_FIXED_CHUNKSIZE=true;
66
67	/** Manifest constant: Suppress leading whitespace.
68	 * This should be used when normalize-to-SAX is called for the first chunk of a
69	 * multi-chunk output, or one following unsuppressed whitespace in a previous
70	 * chunk.
71	 * @see #sendNormalizedSAXcharacters(org.xml.sax.ContentHandler,int,int)
72	 */
73	public static final int SUPPRESS_LEADING_WS=0x01;
74
75	/** Manifest constant: Suppress trailing whitespace.
76	 * This should be used when normalize-to-SAX is called for the last chunk of a
77	 * multi-chunk output; it may have to be or'ed with SUPPRESS_LEADING_WS.
78	 */
79	public static final int SUPPRESS_TRAILING_WS=0x02;
80
81	/** Manifest constant: Suppress both leading and trailing whitespace.
82	 * This should be used when normalize-to-SAX is called for a complete string.
83	 * (I'm not wild about the name of this one. Ideas welcome.)
84	 * @see #sendNormalizedSAXcharacters(org.xml.sax.ContentHandler,int,int)
85	 */
86	public static final int SUPPRESS_BOTH
87		= SUPPRESS_LEADING_WS | SUPPRESS_TRAILING_WS;
88
89	/** Manifest constant: Carry trailing whitespace of one chunk as leading
90	 * whitespace of the next chunk. Used internally; I don't see any reason
91	 * to make it public right now.
92	 */
93	private static final int CARRY_WS=0x04;
94
95	/**
96   * Field m_chunkBits sets our chunking strategy, by saying how many
97   * bits of index can be used within a single chunk before flowing over
98   * to the next chunk. For example, if m_chunkbits is set to 15, each
99   * chunk can contain up to 2^15 (32K) characters
100   */
101  int m_chunkBits = 15;
102
103  /**
104   * Field m_maxChunkBits affects our chunk-growth strategy, by saying what
105   * the largest permissible chunk size is in this particular FastStringBuffer
106   * hierarchy.
107   */
108  int m_maxChunkBits = 15;
109
110  /**
111   * Field m_rechunkBits affects our chunk-growth strategy, by saying how
112   * many chunks should be allocated at one size before we encapsulate them
113   * into the first chunk of the next size up. For example, if m_rechunkBits
114   * is set to 3, then after 8 chunks at a given size we will rebundle
115   * them as the first element of a FastStringBuffer using a chunk size
116   * 8 times larger (chunkBits shifted left three bits).
117   */
118  int m_rebundleBits = 2;
119
120  /**
121   * Field m_chunkSize establishes the maximum size of one chunk of the array
122   * as 2**chunkbits characters.
123   * (Which may also be the minimum size if we aren't tuning for storage)
124   */
125  int m_chunkSize;  // =1<<(m_chunkBits-1);
126
127  /**
128   * Field m_chunkMask is m_chunkSize-1 -- in other words, m_chunkBits
129   * worth of low-order '1' bits, useful for shift-and-mask addressing
130   * within the chunks.
131   */
132  int m_chunkMask;  // =m_chunkSize-1;
133
134  /**
135   * Field m_array holds the string buffer's text contents, using an
136   * array-of-arrays. Note that this array, and the arrays it contains, may be
137   * reallocated when necessary in order to allow the buffer to grow;
138   * references to them should be considered to be invalidated after any
139   * append. However, the only time these arrays are directly exposed
140   * is in the sendSAXcharacters call.
141   */
142  char[][] m_array;
143
144  /**
145   * Field m_lastChunk is an index into m_array[], pointing to the last
146   * chunk of the Chunked Array currently in use. Note that additional
147   * chunks may actually be allocated, eg if the FastStringBuffer had
148   * previously been truncated or if someone issued an ensureSpace request.
149   * <p>
150   * The insertion point for append operations is addressed by the combination
151   * of m_lastChunk and m_firstFree.
152   */
153  int m_lastChunk = 0;
154
155  /**
156   * Field m_firstFree is an index into m_array[m_lastChunk][], pointing to
157   * the first character in the Chunked Array which is not part of the
158   * FastStringBuffer's current content. Since m_array[][] is zero-based,
159   * the length of that content can be calculated as
160   * (m_lastChunk<<m_chunkBits) + m_firstFree
161   */
162  int m_firstFree = 0;
163
164  /**
165   * Field m_innerFSB, when non-null, is a FastStringBuffer whose total
166   * length equals m_chunkSize, and which replaces m_array[0]. This allows
167   * building a hierarchy of FastStringBuffers, where early appends use
168   * a smaller chunkSize (for less wasted memory overhead) but later
169   * ones use a larger chunkSize (for less heap activity overhead).
170   */
171  FastStringBuffer m_innerFSB = null;
172
173  /**
174   * Construct a FastStringBuffer, with allocation policy as per parameters.
175   * <p>
176   * For coding convenience, I've expressed both allocation sizes in terms of
177   * a number of bits. That's needed for the final size of a chunk,
178   * to permit fast and efficient shift-and-mask addressing. It's less critical
179   * for the inital size, and may be reconsidered.
180   * <p>
181   * An alternative would be to accept integer sizes and round to powers of two;
182   * that really doesn't seem to buy us much, if anything.
183   *
184   * @param initChunkBits Length in characters of the initial allocation
185   * of a chunk, expressed in log-base-2. (That is, 10 means allocate 1024
186   * characters.) Later chunks will use larger allocation units, to trade off
187   * allocation speed of large document against storage efficiency of small
188   * ones.
189   * @param maxChunkBits Number of character-offset bits that should be used for
190   * addressing within a chunk. Maximum length of a chunk is 2^chunkBits
191   * characters.
192   * @param rebundleBits Number of character-offset bits that addressing should
193   * advance before we attempt to take a step from initChunkBits to maxChunkBits
194   */
195  public FastStringBuffer(int initChunkBits, int maxChunkBits,
196                          int rebundleBits)
197  {
198    if(DEBUG_FORCE_INIT_BITS!=0) initChunkBits=DEBUG_FORCE_INIT_BITS;
199
200    // %REVIEW%
201    // Should this force to larger value, or smaller? Smaller less efficient, but if
202    // someone requested variable mode it's because they care about storage space.
203    // On the other hand, given the other changes I'm making, odds are that we should
204    // adopt the larger size. Dither, dither, dither... This is just stopgap workaround
205    // anyway; we need a permanant solution.
206    //
207    if(DEBUG_FORCE_FIXED_CHUNKSIZE) maxChunkBits=initChunkBits;
208    //if(DEBUG_FORCE_FIXED_CHUNKSIZE) initChunkBits=maxChunkBits;
209
210    m_array = new char[16][];
211
212    // Don't bite off more than we're prepared to swallow!
213    if (initChunkBits > maxChunkBits)
214      initChunkBits = maxChunkBits;
215
216    m_chunkBits = initChunkBits;
217    m_maxChunkBits = maxChunkBits;
218    m_rebundleBits = rebundleBits;
219    m_chunkSize = 1 << (initChunkBits);
220    m_chunkMask = m_chunkSize - 1;
221    m_array[0] = new char[m_chunkSize];
222  }
223
224  /**
225   * Construct a FastStringBuffer, using a default rebundleBits value.
226   *
227   * NEEDSDOC @param initChunkBits
228   * NEEDSDOC @param maxChunkBits
229   */
230  public FastStringBuffer(int initChunkBits, int maxChunkBits)
231  {
232    this(initChunkBits, maxChunkBits, 2);
233  }
234
235  /**
236   * Construct a FastStringBuffer, using default maxChunkBits and
237   * rebundleBits values.
238   * <p>
239   * ISSUE: Should this call assert initial size, or fixed size?
240   * Now configured as initial, with a default for fixed.
241   *
242   * NEEDSDOC @param initChunkBits
243   */
244  public FastStringBuffer(int initChunkBits)
245  {
246    this(initChunkBits, 15, 2);
247  }
248
249  /**
250   * Construct a FastStringBuffer, using a default allocation policy.
251   */
252  public FastStringBuffer()
253  {
254
255    // 10 bits is 1K. 15 bits is 32K. Remember that these are character
256    // counts, so actual memory allocation unit is doubled for UTF-16 chars.
257    //
258    // For reference: In the original FastStringBuffer, we simply
259    // overallocated by blocksize (default 1KB) on each buffer-growth.
260    this(10, 15, 2);
261  }
262
263  /**
264   * Get the length of the list. Synonym for length().
265   *
266   * @return the number of characters in the FastStringBuffer's content.
267   */
268  public final int size()
269  {
270    return (m_lastChunk << m_chunkBits) + m_firstFree;
271  }
272
273  /**
274   * Get the length of the list. Synonym for size().
275   *
276   * @return the number of characters in the FastStringBuffer's content.
277   */
278  public final int length()
279  {
280    return (m_lastChunk << m_chunkBits) + m_firstFree;
281  }
282
283  /**
284   * Discard the content of the FastStringBuffer, and most of the memory
285   * that was allocated by it, restoring the initial state. Note that this
286   * may eventually be different from setLength(0), which see.
287   */
288  public final void reset()
289  {
290
291    m_lastChunk = 0;
292    m_firstFree = 0;
293
294    // Recover the original chunk size
295    FastStringBuffer innermost = this;
296
297    while (innermost.m_innerFSB != null)
298    {
299      innermost = innermost.m_innerFSB;
300    }
301
302    m_chunkBits = innermost.m_chunkBits;
303    m_chunkSize = innermost.m_chunkSize;
304    m_chunkMask = innermost.m_chunkMask;
305
306    // Discard the hierarchy
307    m_innerFSB = null;
308    m_array = new char[16][0];
309    m_array[0] = new char[m_chunkSize];
310  }
311
312  /**
313   * Directly set how much of the FastStringBuffer's storage is to be
314   * considered part of its content. This is a fast but hazardous
315   * operation. It is not protected against negative values, or values
316   * greater than the amount of storage currently available... and even
317   * if additional storage does exist, its contents are unpredictable.
318   * The only safe use for our setLength() is to truncate the FastStringBuffer
319   * to a shorter string.
320   *
321   * @param l New length. If l<0 or l>=getLength(), this operation will
322   * not report an error but future operations will almost certainly fail.
323   */
324  public final void setLength(int l)
325  {
326    m_lastChunk = l >>> m_chunkBits;
327
328    if (m_lastChunk == 0 && m_innerFSB != null)
329    {
330      // Replace this FSB with the appropriate inner FSB, truncated
331      m_innerFSB.setLength(l, this);
332    }
333    else
334    {
335      m_firstFree = l & m_chunkMask;
336
337	  // There's an edge case if l is an exact multiple of m_chunkBits, which risks leaving
338	  // us pointing at the start of a chunk which has not yet been allocated. Rather than
339	  // pay the cost of dealing with that in the append loops (more scattered and more
340	  // inner-loop), we correct it here by moving to the safe side of that
341	  // line -- as we would have left the indexes had we appended up to that point.
342      if(m_firstFree==0 && m_lastChunk>0)
343      {
344      	--m_lastChunk;
345      	m_firstFree=m_chunkSize;
346      }
347    }
348  }
349
350  /**
351   * Subroutine for the public setLength() method. Deals with the fact
352   * that truncation may require restoring one of the innerFSBs
353   *
354   * NEEDSDOC @param l
355   * NEEDSDOC @param rootFSB
356   */
357  private final void setLength(int l, FastStringBuffer rootFSB)
358  {
359
360    m_lastChunk = l >>> m_chunkBits;
361
362    if (m_lastChunk == 0 && m_innerFSB != null)
363    {
364      m_innerFSB.setLength(l, rootFSB);
365    }
366    else
367    {
368
369      // Undo encapsulation -- pop the innerFSB data back up to root.
370      // Inefficient, but attempts to keep the code simple.
371      rootFSB.m_chunkBits = m_chunkBits;
372      rootFSB.m_maxChunkBits = m_maxChunkBits;
373      rootFSB.m_rebundleBits = m_rebundleBits;
374      rootFSB.m_chunkSize = m_chunkSize;
375      rootFSB.m_chunkMask = m_chunkMask;
376      rootFSB.m_array = m_array;
377      rootFSB.m_innerFSB = m_innerFSB;
378      rootFSB.m_lastChunk = m_lastChunk;
379
380      // Finally, truncate this sucker.
381      rootFSB.m_firstFree = l & m_chunkMask;
382    }
383  }
384
385  /**
386   * Note that this operation has been somewhat deoptimized by the shift to a
387   * chunked array, as there is no factory method to produce a String object
388   * directly from an array of arrays and hence a double copy is needed.
389   * By using ensureCapacity we hope to minimize the heap overhead of building
390   * the intermediate StringBuffer.
391   * <p>
392   * (It really is a pity that Java didn't design String as a final subclass
393   * of MutableString, rather than having StringBuffer be a separate hierarchy.
394   * We'd avoid a <strong>lot</strong> of double-buffering.)
395   *
396   * @return the contents of the FastStringBuffer as a standard Java string.
397   */
398  public final String toString()
399  {
400
401    int length = (m_lastChunk << m_chunkBits) + m_firstFree;
402
403    return getString(new StringBuffer(length), 0, 0, length).toString();
404  }
405
406  /**
407   * Append a single character onto the FastStringBuffer, growing the
408   * storage if necessary.
409   * <p>
410   * NOTE THAT after calling append(), previously obtained
411   * references to m_array[][] may no longer be valid....
412   * though in fact they should be in this instance.
413   *
414   * @param value character to be appended.
415   */
416  public final void append(char value)
417  {
418
419    char[] chunk;
420
421    // We may have preallocated chunks. If so, all but last should
422    // be at full size.
423
424    if (m_firstFree < m_chunkSize)  // Simplified test single-character-fits
425      chunk = m_array[m_lastChunk];
426    else
427    {
428
429      // Extend array?
430      int i = m_array.length;
431
432      if (m_lastChunk + 1 == i)
433      {
434        char[][] newarray = new char[i + 16][];
435
436        System.arraycopy(m_array, 0, newarray, 0, i);
437
438        m_array = newarray;
439      }
440
441      // Advance one chunk
442      chunk = m_array[++m_lastChunk];
443
444      if (chunk == null)
445      {
446
447        // Hierarchical encapsulation
448        if (m_lastChunk == 1 << m_rebundleBits
449                && m_chunkBits < m_maxChunkBits)
450        {
451
452          // Should do all the work of both encapsulating
453          // existing data and establishing new sizes/offsets
454          m_innerFSB = new FastStringBuffer(this);
455        }
456
457        // Add a chunk.
458        chunk = m_array[m_lastChunk] = new char[m_chunkSize];
459      }
460
461      m_firstFree = 0;
462    }
463
464    // Space exists in the chunk. Append the character.
465    chunk[m_firstFree++] = value;
466  }
467
468  /**
469   * Append the contents of a String onto the FastStringBuffer,
470   * growing the storage if necessary.
471   * <p>
472   * NOTE THAT after calling append(), previously obtained
473   * references to m_array[] may no longer be valid.
474   *
475   * @param value String whose contents are to be appended.
476   */
477  public final void append(String value)
478  {
479
480    if (value == null)
481      return;
482    int strlen = value.length();
483
484    if (0 == strlen)
485      return;
486
487    int copyfrom = 0;
488    char[] chunk = m_array[m_lastChunk];
489    int available = m_chunkSize - m_firstFree;
490
491    // Repeat while data remains to be copied
492    while (strlen > 0)
493    {
494
495      // Copy what fits
496      if (available > strlen)
497        available = strlen;
498
499      value.getChars(copyfrom, copyfrom + available, m_array[m_lastChunk],
500                     m_firstFree);
501
502      strlen -= available;
503      copyfrom += available;
504
505      // If there's more left, allocate another chunk and continue
506      if (strlen > 0)
507      {
508
509        // Extend array?
510        int i = m_array.length;
511
512        if (m_lastChunk + 1 == i)
513        {
514          char[][] newarray = new char[i + 16][];
515
516          System.arraycopy(m_array, 0, newarray, 0, i);
517
518          m_array = newarray;
519        }
520
521        // Advance one chunk
522        chunk = m_array[++m_lastChunk];
523
524        if (chunk == null)
525        {
526
527          // Hierarchical encapsulation
528          if (m_lastChunk == 1 << m_rebundleBits
529                  && m_chunkBits < m_maxChunkBits)
530          {
531
532            // Should do all the work of both encapsulating
533            // existing data and establishing new sizes/offsets
534            m_innerFSB = new FastStringBuffer(this);
535          }
536
537          // Add a chunk.
538          chunk = m_array[m_lastChunk] = new char[m_chunkSize];
539        }
540
541        available = m_chunkSize;
542        m_firstFree = 0;
543      }
544    }
545
546    // Adjust the insert point in the last chunk, when we've reached it.
547    m_firstFree += available;
548  }
549
550  /**
551   * Append the contents of a StringBuffer onto the FastStringBuffer,
552   * growing the storage if necessary.
553   * <p>
554   * NOTE THAT after calling append(), previously obtained
555   * references to m_array[] may no longer be valid.
556   *
557   * @param value StringBuffer whose contents are to be appended.
558   */
559  public final void append(StringBuffer value)
560  {
561
562    if (value == null)
563      return;
564    int strlen = value.length();
565
566    if (0 == strlen)
567      return;
568
569    int copyfrom = 0;
570    char[] chunk = m_array[m_lastChunk];
571    int available = m_chunkSize - m_firstFree;
572
573    // Repeat while data remains to be copied
574    while (strlen > 0)
575    {
576
577      // Copy what fits
578      if (available > strlen)
579        available = strlen;
580
581      value.getChars(copyfrom, copyfrom + available, m_array[m_lastChunk],
582                     m_firstFree);
583
584      strlen -= available;
585      copyfrom += available;
586
587      // If there's more left, allocate another chunk and continue
588      if (strlen > 0)
589      {
590
591        // Extend array?
592        int i = m_array.length;
593
594        if (m_lastChunk + 1 == i)
595        {
596          char[][] newarray = new char[i + 16][];
597
598          System.arraycopy(m_array, 0, newarray, 0, i);
599
600          m_array = newarray;
601        }
602
603        // Advance one chunk
604        chunk = m_array[++m_lastChunk];
605
606        if (chunk == null)
607        {
608
609          // Hierarchical encapsulation
610          if (m_lastChunk == 1 << m_rebundleBits
611                  && m_chunkBits < m_maxChunkBits)
612          {
613
614            // Should do all the work of both encapsulating
615            // existing data and establishing new sizes/offsets
616            m_innerFSB = new FastStringBuffer(this);
617          }
618
619          // Add a chunk.
620          chunk = m_array[m_lastChunk] = new char[m_chunkSize];
621        }
622
623        available = m_chunkSize;
624        m_firstFree = 0;
625      }
626    }
627
628    // Adjust the insert point in the last chunk, when we've reached it.
629    m_firstFree += available;
630  }
631
632  /**
633   * Append part of the contents of a Character Array onto the
634   * FastStringBuffer,  growing the storage if necessary.
635   * <p>
636   * NOTE THAT after calling append(), previously obtained
637   * references to m_array[] may no longer be valid.
638   *
639   * @param chars character array from which data is to be copied
640   * @param start offset in chars of first character to be copied,
641   * zero-based.
642   * @param length number of characters to be copied
643   */
644  public final void append(char[] chars, int start, int length)
645  {
646
647    int strlen = length;
648
649    if (0 == strlen)
650      return;
651
652    int copyfrom = start;
653    char[] chunk = m_array[m_lastChunk];
654    int available = m_chunkSize - m_firstFree;
655
656    // Repeat while data remains to be copied
657    while (strlen > 0)
658    {
659
660      // Copy what fits
661      if (available > strlen)
662        available = strlen;
663
664      System.arraycopy(chars, copyfrom, m_array[m_lastChunk], m_firstFree,
665                       available);
666
667      strlen -= available;
668      copyfrom += available;
669
670      // If there's more left, allocate another chunk and continue
671      if (strlen > 0)
672      {
673
674        // Extend array?
675        int i = m_array.length;
676
677        if (m_lastChunk + 1 == i)
678        {
679          char[][] newarray = new char[i + 16][];
680
681          System.arraycopy(m_array, 0, newarray, 0, i);
682
683          m_array = newarray;
684        }
685
686        // Advance one chunk
687        chunk = m_array[++m_lastChunk];
688
689        if (chunk == null)
690        {
691
692          // Hierarchical encapsulation
693          if (m_lastChunk == 1 << m_rebundleBits
694                  && m_chunkBits < m_maxChunkBits)
695          {
696
697            // Should do all the work of both encapsulating
698            // existing data and establishing new sizes/offsets
699            m_innerFSB = new FastStringBuffer(this);
700          }
701
702          // Add a chunk.
703          chunk = m_array[m_lastChunk] = new char[m_chunkSize];
704        }
705
706        available = m_chunkSize;
707        m_firstFree = 0;
708      }
709    }
710
711    // Adjust the insert point in the last chunk, when we've reached it.
712    m_firstFree += available;
713  }
714
715  /**
716   * Append the contents of another FastStringBuffer onto
717   * this FastStringBuffer, growing the storage if necessary.
718   * <p>
719   * NOTE THAT after calling append(), previously obtained
720   * references to m_array[] may no longer be valid.
721   *
722   * @param value FastStringBuffer whose contents are
723   * to be appended.
724   */
725  public final void append(FastStringBuffer value)
726  {
727
728    // Complicating factor here is that the two buffers may use
729    // different chunk sizes, and even if they're the same we're
730    // probably on a different alignment due to previously appended
731    // data. We have to work through the source in bite-sized chunks.
732    if (value == null)
733      return;
734    int strlen = value.length();
735
736    if (0 == strlen)
737      return;
738
739    int copyfrom = 0;
740    char[] chunk = m_array[m_lastChunk];
741    int available = m_chunkSize - m_firstFree;
742
743    // Repeat while data remains to be copied
744    while (strlen > 0)
745    {
746
747      // Copy what fits
748      if (available > strlen)
749        available = strlen;
750
751      int sourcechunk = (copyfrom + value.m_chunkSize - 1)
752                        >>> value.m_chunkBits;
753      int sourcecolumn = copyfrom & value.m_chunkMask;
754      int runlength = value.m_chunkSize - sourcecolumn;
755
756      if (runlength > available)
757        runlength = available;
758
759      System.arraycopy(value.m_array[sourcechunk], sourcecolumn,
760                       m_array[m_lastChunk], m_firstFree, runlength);
761
762      if (runlength != available)
763        System.arraycopy(value.m_array[sourcechunk + 1], 0,
764                         m_array[m_lastChunk], m_firstFree + runlength,
765                         available - runlength);
766
767      strlen -= available;
768      copyfrom += available;
769
770      // If there's more left, allocate another chunk and continue
771      if (strlen > 0)
772      {
773
774        // Extend array?
775        int i = m_array.length;
776
777        if (m_lastChunk + 1 == i)
778        {
779          char[][] newarray = new char[i + 16][];
780
781          System.arraycopy(m_array, 0, newarray, 0, i);
782
783          m_array = newarray;
784        }
785
786        // Advance one chunk
787        chunk = m_array[++m_lastChunk];
788
789        if (chunk == null)
790        {
791
792          // Hierarchical encapsulation
793          if (m_lastChunk == 1 << m_rebundleBits
794                  && m_chunkBits < m_maxChunkBits)
795          {
796
797            // Should do all the work of both encapsulating
798            // existing data and establishing new sizes/offsets
799            m_innerFSB = new FastStringBuffer(this);
800          }
801
802          // Add a chunk.
803          chunk = m_array[m_lastChunk] = new char[m_chunkSize];
804        }
805
806        available = m_chunkSize;
807        m_firstFree = 0;
808      }
809    }
810
811    // Adjust the insert point in the last chunk, when we've reached it.
812    m_firstFree += available;
813  }
814
815  /**
816   * @return true if the specified range of characters are all whitespace,
817   * as defined by XMLCharacterRecognizer.
818   * <p>
819   * CURRENTLY DOES NOT CHECK FOR OUT-OF-RANGE.
820   *
821   * @param start Offset of first character in the range.
822   * @param length Number of characters to send.
823   */
824  public boolean isWhitespace(int start, int length)
825  {
826
827    int sourcechunk = start >>> m_chunkBits;
828    int sourcecolumn = start & m_chunkMask;
829    int available = m_chunkSize - sourcecolumn;
830    boolean chunkOK;
831
832    while (length > 0)
833    {
834      int runlength = (length <= available) ? length : available;
835
836      if (sourcechunk == 0 && m_innerFSB != null)
837        chunkOK = m_innerFSB.isWhitespace(sourcecolumn, runlength);
838      else
839        chunkOK = org.apache.xml.utils.XMLCharacterRecognizer.isWhiteSpace(
840          m_array[sourcechunk], sourcecolumn, runlength);
841
842      if (!chunkOK)
843        return false;
844
845      length -= runlength;
846
847      ++sourcechunk;
848
849      sourcecolumn = 0;
850      available = m_chunkSize;
851    }
852
853    return true;
854  }
855
856  /**
857   * @param start Offset of first character in the range.
858   * @param length Number of characters to send.
859   * @return a new String object initialized from the specified range of
860   * characters.
861   */
862  public String getString(int start, int length)
863  {
864    int startColumn = start & m_chunkMask;
865    int startChunk = start >>> m_chunkBits;
866    if (startColumn + length < m_chunkMask && m_innerFSB == null) {
867      return getOneChunkString(startChunk, startColumn, length);
868    }
869    return getString(new StringBuffer(length), startChunk, startColumn,
870                     length).toString();
871  }
872
873  protected String getOneChunkString(int startChunk, int startColumn,
874                                     int length) {
875    return new String(m_array[startChunk], startColumn, length);
876  }
877
878  /**
879   * @param sb StringBuffer to be appended to
880   * @param start Offset of first character in the range.
881   * @param length Number of characters to send.
882   * @return sb with the requested text appended to it
883   */
884  StringBuffer getString(StringBuffer sb, int start, int length)
885  {
886    return getString(sb, start >>> m_chunkBits, start & m_chunkMask, length);
887  }
888
889  /**
890   * Internal support for toString() and getString().
891   * PLEASE NOTE SIGNATURE CHANGE from earlier versions; it now appends into
892   * and returns a StringBuffer supplied by the caller. This simplifies
893   * m_innerFSB support.
894   * <p>
895   * Note that this operation has been somewhat deoptimized by the shift to a
896   * chunked array, as there is no factory method to produce a String object
897   * directly from an array of arrays and hence a double copy is needed.
898   * By presetting length we hope to minimize the heap overhead of building
899   * the intermediate StringBuffer.
900   * <p>
901   * (It really is a pity that Java didn't design String as a final subclass
902   * of MutableString, rather than having StringBuffer be a separate hierarchy.
903   * We'd avoid a <strong>lot</strong> of double-buffering.)
904   *
905   *
906   * @param sb
907   * @param startChunk
908   * @param startColumn
909   * @param length
910   *
911   * @return the contents of the FastStringBuffer as a standard Java string.
912   */
913  StringBuffer getString(StringBuffer sb, int startChunk, int startColumn,
914                         int length)
915  {
916
917    int stop = (startChunk << m_chunkBits) + startColumn + length;
918    int stopChunk = stop >>> m_chunkBits;
919    int stopColumn = stop & m_chunkMask;
920
921    // Factored out
922    //StringBuffer sb=new StringBuffer(length);
923    for (int i = startChunk; i < stopChunk; ++i)
924    {
925      if (i == 0 && m_innerFSB != null)
926        m_innerFSB.getString(sb, startColumn, m_chunkSize - startColumn);
927      else
928        sb.append(m_array[i], startColumn, m_chunkSize - startColumn);
929
930      startColumn = 0;  // after first chunk
931    }
932
933    if (stopChunk == 0 && m_innerFSB != null)
934      m_innerFSB.getString(sb, startColumn, stopColumn - startColumn);
935    else if (stopColumn > startColumn)
936      sb.append(m_array[stopChunk], startColumn, stopColumn - startColumn);
937
938    return sb;
939  }
940
941  /**
942   * Get a single character from the string buffer.
943   *
944   *
945   * @param pos character position requested.
946   * @return A character from the requested position.
947   */
948  public char charAt(int pos)
949  {
950    int startChunk = pos >>> m_chunkBits;
951
952    if (startChunk == 0 && m_innerFSB != null)
953      return m_innerFSB.charAt(pos & m_chunkMask);
954    else
955      return m_array[startChunk][pos & m_chunkMask];
956  }
957
958  /**
959   * Sends the specified range of characters as one or more SAX characters()
960   * events.
961   * Note that the buffer reference passed to the ContentHandler may be
962   * invalidated if the FastStringBuffer is edited; it's the user's
963   * responsibility to manage access to the FastStringBuffer to prevent this
964   * problem from arising.
965   * <p>
966   * Note too that there is no promise that the output will be sent as a
967   * single call. As is always true in SAX, one logical string may be split
968   * across multiple blocks of memory and hence delivered as several
969   * successive events.
970   *
971   * @param ch SAX ContentHandler object to receive the event.
972   * @param start Offset of first character in the range.
973   * @param length Number of characters to send.
974   * @exception org.xml.sax.SAXException may be thrown by handler's
975   * characters() method.
976   */
977  public void sendSAXcharacters(
978          org.xml.sax.ContentHandler ch, int start, int length)
979            throws org.xml.sax.SAXException
980  {
981
982    int startChunk = start >>> m_chunkBits;
983    int startColumn = start & m_chunkMask;
984    if (startColumn + length < m_chunkMask && m_innerFSB == null) {
985        ch.characters(m_array[startChunk], startColumn, length);
986        return;
987    }
988
989    int stop = start + length;
990    int stopChunk = stop >>> m_chunkBits;
991    int stopColumn = stop & m_chunkMask;
992
993    for (int i = startChunk; i < stopChunk; ++i)
994    {
995      if (i == 0 && m_innerFSB != null)
996        m_innerFSB.sendSAXcharacters(ch, startColumn,
997                                     m_chunkSize - startColumn);
998      else
999        ch.characters(m_array[i], startColumn, m_chunkSize - startColumn);
1000
1001      startColumn = 0;  // after first chunk
1002    }
1003
1004    // Last, or only, chunk
1005    if (stopChunk == 0 && m_innerFSB != null)
1006      m_innerFSB.sendSAXcharacters(ch, startColumn, stopColumn - startColumn);
1007    else if (stopColumn > startColumn)
1008    {
1009      ch.characters(m_array[stopChunk], startColumn,
1010                    stopColumn - startColumn);
1011    }
1012  }
1013
1014  /**
1015   * Sends the specified range of characters as one or more SAX characters()
1016   * events, normalizing the characters according to XSLT rules.
1017   *
1018   * @param ch SAX ContentHandler object to receive the event.
1019   * @param start Offset of first character in the range.
1020   * @param length Number of characters to send.
1021   * @return normalization status to apply to next chunk (because we may
1022   * have been called recursively to process an inner FSB):
1023   * <dl>
1024   * <dt>0</dt>
1025   * <dd>if this output did not end in retained whitespace, and thus whitespace
1026   * at the start of the following chunk (if any) should be converted to a
1027   * single space.
1028   * <dt>SUPPRESS_LEADING_WS</dt>
1029   * <dd>if this output ended in retained whitespace, and thus whitespace
1030   * at the start of the following chunk (if any) should be completely
1031   * suppressed.</dd>
1032   * </dd>
1033   * </dl>
1034   * @exception org.xml.sax.SAXException may be thrown by handler's
1035   * characters() method.
1036   */
1037  public int sendNormalizedSAXcharacters(
1038          org.xml.sax.ContentHandler ch, int start, int length)
1039            throws org.xml.sax.SAXException
1040  {
1041	// This call always starts at the beginning of the
1042    // string being written out, either because it was called directly or
1043    // because it was an m_innerFSB recursion. This is important since
1044	// it gives us a well-known initial state for this flag:
1045	int stateForNextChunk=SUPPRESS_LEADING_WS;
1046
1047    int stop = start + length;
1048    int startChunk = start >>> m_chunkBits;
1049    int startColumn = start & m_chunkMask;
1050    int stopChunk = stop >>> m_chunkBits;
1051    int stopColumn = stop & m_chunkMask;
1052
1053    for (int i = startChunk; i < stopChunk; ++i)
1054    {
1055      if (i == 0 && m_innerFSB != null)
1056				stateForNextChunk=
1057        m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn,
1058                                     m_chunkSize - startColumn);
1059      else
1060				stateForNextChunk=
1061        sendNormalizedSAXcharacters(m_array[i], startColumn,
1062                                    m_chunkSize - startColumn,
1063																		ch,stateForNextChunk);
1064
1065      startColumn = 0;  // after first chunk
1066    }
1067
1068    // Last, or only, chunk
1069    if (stopChunk == 0 && m_innerFSB != null)
1070			stateForNextChunk= // %REVIEW% Is this update really needed?
1071      m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn, stopColumn - startColumn);
1072    else if (stopColumn > startColumn)
1073    {
1074			stateForNextChunk= // %REVIEW% Is this update really needed?
1075      sendNormalizedSAXcharacters(m_array[stopChunk],
1076																	startColumn, stopColumn - startColumn,
1077																	ch, stateForNextChunk | SUPPRESS_TRAILING_WS);
1078    }
1079		return stateForNextChunk;
1080  }
1081
1082  static final char[] SINGLE_SPACE = {' '};
1083
1084  /**
1085   * Internal method to directly normalize and dispatch the character array.
1086   * This version is aware of the fact that it may be called several times
1087   * in succession if the data is made up of multiple "chunks", and thus
1088   * must actively manage the handling of leading and trailing whitespace.
1089   *
1090   * Note: The recursion is due to the possible recursion of inner FSBs.
1091   *
1092   * @param ch The characters from the XML document.
1093   * @param start The start position in the array.
1094   * @param length The number of characters to read from the array.
1095   * @param handler SAX ContentHandler object to receive the event.
1096   * @param edgeTreatmentFlags How leading/trailing spaces should be handled.
1097   * This is a bitfield contining two flags, bitwise-ORed together:
1098   * <dl>
1099   * <dt>SUPPRESS_LEADING_WS</dt>
1100   * <dd>When false, causes leading whitespace to be converted to a single
1101   * space; when true, causes it to be discarded entirely.
1102   * Should be set TRUE for the first chunk, and (in multi-chunk output)
1103   * whenever the previous chunk ended in retained whitespace.</dd>
1104   * <dt>SUPPRESS_TRAILING_WS</dt>
1105   * <dd>When false, causes trailing whitespace to be converted to a single
1106   * space; when true, causes it to be discarded entirely.
1107   * Should be set TRUE for the last or only chunk.
1108   * </dd>
1109   * </dl>
1110   * @return normalization status, as in the edgeTreatmentFlags parameter:
1111   * <dl>
1112   * <dt>0</dt>
1113   * <dd>if this output did not end in retained whitespace, and thus whitespace
1114   * at the start of the following chunk (if any) should be converted to a
1115   * single space.
1116   * <dt>SUPPRESS_LEADING_WS</dt>
1117   * <dd>if this output ended in retained whitespace, and thus whitespace
1118   * at the start of the following chunk (if any) should be completely
1119   * suppressed.</dd>
1120   * </dd>
1121   * </dl>
1122   *
1123   *
1124   * @exception org.xml.sax.SAXException Any SAX exception, possibly
1125   *            wrapping another exception.
1126   */
1127  static int sendNormalizedSAXcharacters(char ch[],
1128             int start, int length,
1129             org.xml.sax.ContentHandler handler,
1130						 int edgeTreatmentFlags)
1131          throws org.xml.sax.SAXException
1132  {
1133     boolean processingLeadingWhitespace =
1134                       ((edgeTreatmentFlags & SUPPRESS_LEADING_WS) != 0);
1135     boolean seenWhitespace = ((edgeTreatmentFlags & CARRY_WS) != 0);
1136     int currPos = start;
1137     int limit = start+length;
1138
1139     // Strip any leading spaces first, if required
1140     if (processingLeadingWhitespace) {
1141         for (; currPos < limit
1142                && XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
1143              currPos++) { }
1144
1145         // If we've only encountered leading spaces, the
1146         // current state remains unchanged
1147         if (currPos == limit) {
1148             return edgeTreatmentFlags;
1149         }
1150     }
1151
1152     // If we get here, there are no more leading spaces to strip
1153     while (currPos < limit) {
1154         int startNonWhitespace = currPos;
1155
1156         // Grab a chunk of non-whitespace characters
1157         for (; currPos < limit
1158                && !XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
1159              currPos++) { }
1160
1161         // Non-whitespace seen - emit them, along with a single
1162         // space for any preceding whitespace characters
1163         if (startNonWhitespace != currPos) {
1164             if (seenWhitespace) {
1165                 handler.characters(SINGLE_SPACE, 0, 1);
1166                 seenWhitespace = false;
1167             }
1168             handler.characters(ch, startNonWhitespace,
1169                                currPos - startNonWhitespace);
1170         }
1171
1172         int startWhitespace = currPos;
1173
1174         // Consume any whitespace characters
1175         for (; currPos < limit
1176                && XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
1177              currPos++) { }
1178
1179         if (startWhitespace != currPos) {
1180             seenWhitespace = true;
1181         }
1182     }
1183
1184     return (seenWhitespace ? CARRY_WS : 0)
1185            | (edgeTreatmentFlags & SUPPRESS_TRAILING_WS);
1186  }
1187
1188  /**
1189   * Directly normalize and dispatch the character array.
1190   *
1191   * @param ch The characters from the XML document.
1192   * @param start The start position in the array.
1193   * @param length The number of characters to read from the array.
1194   * @param handler SAX ContentHandler object to receive the event.
1195   * @exception org.xml.sax.SAXException Any SAX exception, possibly
1196   *            wrapping another exception.
1197   */
1198  public static void sendNormalizedSAXcharacters(char ch[],
1199             int start, int length,
1200             org.xml.sax.ContentHandler handler)
1201          throws org.xml.sax.SAXException
1202  {
1203		sendNormalizedSAXcharacters(ch, start, length,
1204             handler, SUPPRESS_BOTH);
1205	}
1206
1207	/**
1208   * Sends the specified range of characters as sax Comment.
1209   * <p>
1210   * Note that, unlike sendSAXcharacters, this has to be done as a single
1211   * call to LexicalHandler#comment.
1212   *
1213   * @param ch SAX LexicalHandler object to receive the event.
1214   * @param start Offset of first character in the range.
1215   * @param length Number of characters to send.
1216   * @exception org.xml.sax.SAXException may be thrown by handler's
1217   * characters() method.
1218   */
1219  public void sendSAXComment(
1220          org.xml.sax.ext.LexicalHandler ch, int start, int length)
1221            throws org.xml.sax.SAXException
1222  {
1223
1224    // %OPT% Do it this way for now...
1225    String comment = getString(start, length);
1226    ch.comment(comment.toCharArray(), 0, length);
1227  }
1228
1229  /**
1230   * Copies characters from this string into the destination character
1231   * array.
1232   *
1233   * @param      srcBegin   index of the first character in the string
1234   *                        to copy.
1235   * @param      srcEnd     index after the last character in the string
1236   *                        to copy.
1237   * @param      dst        the destination array.
1238   * @param      dstBegin   the start offset in the destination array.
1239   * @exception IndexOutOfBoundsException If any of the following
1240   *            is true:
1241   *            <ul><li><code>srcBegin</code> is negative.
1242   *            <li><code>srcBegin</code> is greater than <code>srcEnd</code>
1243   *            <li><code>srcEnd</code> is greater than the length of this
1244   *                string
1245   *            <li><code>dstBegin</code> is negative
1246   *            <li><code>dstBegin+(srcEnd-srcBegin)</code> is larger than
1247   *                <code>dst.length</code></ul>
1248   * @exception NullPointerException if <code>dst</code> is <code>null</code>
1249   */
1250  private void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin)
1251  {
1252    // %TBD% Joe needs to write this function.  Make public when implemented.
1253  }
1254
1255  /**
1256   * Encapsulation c'tor. After this is called, the source FastStringBuffer
1257   * will be reset to use the new object as its m_innerFSB, and will have
1258   * had its chunk size reset appropriately. IT SHOULD NEVER BE CALLED
1259   * EXCEPT WHEN source.length()==1<<(source.m_chunkBits+source.m_rebundleBits)
1260   *
1261   * NEEDSDOC @param source
1262   */
1263  private FastStringBuffer(FastStringBuffer source)
1264  {
1265
1266    // Copy existing information into new encapsulation
1267    m_chunkBits = source.m_chunkBits;
1268    m_maxChunkBits = source.m_maxChunkBits;
1269    m_rebundleBits = source.m_rebundleBits;
1270    m_chunkSize = source.m_chunkSize;
1271    m_chunkMask = source.m_chunkMask;
1272    m_array = source.m_array;
1273    m_innerFSB = source.m_innerFSB;
1274
1275    // These have to be adjusted because we're calling just at the time
1276    // when we would be about to allocate another chunk
1277    m_lastChunk = source.m_lastChunk - 1;
1278    m_firstFree = source.m_chunkSize;
1279
1280    // Establish capsule as the Inner FSB, reset chunk sizes/addressing
1281    source.m_array = new char[16][];
1282    source.m_innerFSB = this;
1283
1284    // Since we encapsulated just as we were about to append another
1285    // chunk, return ready to create the chunk after the innerFSB
1286    // -- 1, not 0.
1287    source.m_lastChunk = 1;
1288    source.m_firstFree = 0;
1289    source.m_chunkBits += m_rebundleBits;
1290    source.m_chunkSize = 1 << (source.m_chunkBits);
1291    source.m_chunkMask = source.m_chunkSize - 1;
1292  }
1293}
1294