1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc.  All rights reserved.
3// https://developers.google.com/protocol-buffers/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9//     * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11//     * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15//     * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31package com.google.protobuf;
32
33import java.io.IOException;
34import java.io.InputStream;
35import java.io.OutputStream;
36import java.io.UnsupportedEncodingException;
37import java.io.ByteArrayInputStream;
38import java.nio.ByteBuffer;
39import java.util.ArrayList;
40import java.util.Arrays;
41import java.util.Iterator;
42import java.util.List;
43import java.util.NoSuchElementException;
44import java.util.Stack;
45
46/**
47 * Class to represent {@code ByteStrings} formed by concatenation of other
48 * ByteStrings, without copying the data in the pieces. The concatenation is
49 * represented as a tree whose leaf nodes are each a {@link LiteralByteString}.
50 *
51 * <p>Most of the operation here is inspired by the now-famous paper <a
52 * href="http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf">
53 * BAP95 </a> Ropes: an Alternative to Strings hans-j. boehm, russ atkinson and
54 * michael plass
55 *
56 * <p>The algorithms described in the paper have been implemented for character
57 * strings in {@link com.google.common.string.Rope} and in the c++ class {@code
58 * cord.cc}.
59 *
60 * <p>Fundamentally the Rope algorithm represents the collection of pieces as a
61 * binary tree. BAP95 uses a Fibonacci bound relating depth to a minimum
62 * sequence length, sequences that are too short relative to their depth cause a
63 * tree rebalance.  More precisely, a tree of depth d is "balanced" in the
64 * terminology of BAP95 if its length is at least F(d+2), where F(n) is the
65 * n-the Fibonacci number. Thus for depths 0, 1, 2, 3, 4, 5,... we have minimum
66 * lengths 1, 2, 3, 5, 8, 13,...
67 *
68 * @author carlanton@google.com (Carl Haverl)
69 */
70class RopeByteString extends ByteString {
71
72  /**
73   * BAP95. Let Fn be the nth Fibonacci number. A {@link RopeByteString} of
74   * depth n is "balanced", i.e flat enough, if its length is at least Fn+2,
75   * e.g. a "balanced" {@link RopeByteString} of depth 1 must have length at
76   * least 2, of depth 4 must have length >= 8, etc.
77   *
78   * <p>There's nothing special about using the Fibonacci numbers for this, but
79   * they are a reasonable sequence for encapsulating the idea that we are OK
80   * with longer strings being encoded in deeper binary trees.
81   *
82   * <p>For 32-bit integers, this array has length 46.
83   */
84  private static final int[] minLengthByDepth;
85
86  static {
87    // Dynamically generate the list of Fibonacci numbers the first time this
88    // class is accessed.
89    List<Integer> numbers = new ArrayList<Integer>();
90
91    // we skip the first Fibonacci number (1).  So instead of: 1 1 2 3 5 8 ...
92    // we have: 1 2 3 5 8 ...
93    int f1 = 1;
94    int f2 = 1;
95
96    // get all the values until we roll over.
97    while (f2 > 0) {
98      numbers.add(f2);
99      int temp = f1 + f2;
100      f1 = f2;
101      f2 = temp;
102    }
103
104    // we include this here so that we can index this array to [x + 1] in the
105    // loops below.
106    numbers.add(Integer.MAX_VALUE);
107    minLengthByDepth = new int[numbers.size()];
108    for (int i = 0; i < minLengthByDepth.length; i++) {
109      // unbox all the values
110      minLengthByDepth[i] = numbers.get(i);
111    }
112  }
113
114  private final int totalLength;
115  private final ByteString left;
116  private final ByteString right;
117  private final int leftLength;
118  private final int treeDepth;
119
120  /**
121   * Create a new RopeByteString, which can be thought of as a new tree node, by
122   * recording references to the two given strings.
123   *
124   * @param left  string on the left of this node, should have {@code size() >
125   *              0}
126   * @param right string on the right of this node, should have {@code size() >
127   *              0}
128   */
129  private RopeByteString(ByteString left, ByteString right) {
130    this.left = left;
131    this.right = right;
132    leftLength = left.size();
133    totalLength = leftLength + right.size();
134    treeDepth = Math.max(left.getTreeDepth(), right.getTreeDepth()) + 1;
135  }
136
137  /**
138   * Concatenate the given strings while performing various optimizations to
139   * slow the growth rate of tree depth and tree node count. The result is
140   * either a {@link LiteralByteString} or a {@link RopeByteString}
141   * depending on which optimizations, if any, were applied.
142   *
143   * <p>Small pieces of length less than {@link
144   * ByteString#CONCATENATE_BY_COPY_SIZE} may be copied by value here, as in
145   * BAP95.  Large pieces are referenced without copy.
146   *
147   * @param left  string on the left
148   * @param right string on the right
149   * @return concatenation representing the same sequence as the given strings
150   */
151  static ByteString concatenate(ByteString left, ByteString right) {
152    ByteString result;
153    RopeByteString leftRope =
154        (left instanceof RopeByteString) ? (RopeByteString) left : null;
155    if (right.size() == 0) {
156      result = left;
157    } else if (left.size() == 0) {
158      result = right;
159    } else {
160      int newLength = left.size() + right.size();
161      if (newLength < ByteString.CONCATENATE_BY_COPY_SIZE) {
162        // Optimization from BAP95: For short (leaves in paper, but just short
163        // here) total length, do a copy of data to a new leaf.
164        result = concatenateBytes(left, right);
165      } else if (leftRope != null
166          && leftRope.right.size() + right.size() < CONCATENATE_BY_COPY_SIZE) {
167        // Optimization from BAP95: As an optimization of the case where the
168        // ByteString is constructed by repeated concatenate, recognize the case
169        // where a short string is concatenated to a left-hand node whose
170        // right-hand branch is short.  In the paper this applies to leaves, but
171        // we just look at the length here. This has the advantage of shedding
172        // references to unneeded data when substrings have been taken.
173        //
174        // When we recognize this case, we do a copy of the data and create a
175        // new parent node so that the depth of the result is the same as the
176        // given left tree.
177        ByteString newRight = concatenateBytes(leftRope.right, right);
178        result = new RopeByteString(leftRope.left, newRight);
179      } else if (leftRope != null
180          && leftRope.left.getTreeDepth() > leftRope.right.getTreeDepth()
181          && leftRope.getTreeDepth() > right.getTreeDepth()) {
182        // Typically for concatenate-built strings the left-side is deeper than
183        // the right.  This is our final attempt to concatenate without
184        // increasing the tree depth.  We'll redo the the node on the RHS.  This
185        // is yet another optimization for building the string by repeatedly
186        // concatenating on the right.
187        ByteString newRight = new RopeByteString(leftRope.right, right);
188        result = new RopeByteString(leftRope.left, newRight);
189      } else {
190        // Fine, we'll add a node and increase the tree depth--unless we
191        // rebalance ;^)
192        int newDepth = Math.max(left.getTreeDepth(), right.getTreeDepth()) + 1;
193        if (newLength >= minLengthByDepth[newDepth]) {
194          // The tree is shallow enough, so don't rebalance
195          result = new RopeByteString(left, right);
196        } else {
197          result = new Balancer().balance(left, right);
198        }
199      }
200    }
201    return result;
202  }
203
204  /**
205   * Concatenates two strings by copying data values. This is called in a few
206   * cases in order to reduce the growth of the number of tree nodes.
207   *
208   * @param left  string on the left
209   * @param right string on the right
210   * @return string formed by copying data bytes
211   */
212  private static LiteralByteString concatenateBytes(ByteString left,
213      ByteString right) {
214    int leftSize = left.size();
215    int rightSize = right.size();
216    byte[] bytes = new byte[leftSize + rightSize];
217    left.copyTo(bytes, 0, 0, leftSize);
218    right.copyTo(bytes, 0, leftSize, rightSize);
219    return new LiteralByteString(bytes);  // Constructor wraps bytes
220  }
221
222  /**
223   * Create a new RopeByteString for testing only while bypassing all the
224   * defenses of {@link #concatenate(ByteString, ByteString)}. This allows
225   * testing trees of specific structure. We are also able to insert empty
226   * leaves, though these are dis-allowed, so that we can make sure the
227   * implementation can withstand their presence.
228   *
229   * @param left  string on the left of this node
230   * @param right string on the right of this node
231   * @return an unsafe instance for testing only
232   */
233  static RopeByteString newInstanceForTest(ByteString left, ByteString right) {
234    return new RopeByteString(left, right);
235  }
236
237  /**
238   * Gets the byte at the given index.
239   * Throws {@link ArrayIndexOutOfBoundsException} for backwards-compatibility
240   * reasons although it would more properly be {@link
241   * IndexOutOfBoundsException}.
242   *
243   * @param index index of byte
244   * @return the value
245   * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size
246   */
247  @Override
248  public byte byteAt(int index) {
249    if (index < 0) {
250      throw new ArrayIndexOutOfBoundsException("Index < 0: " + index);
251    }
252    if (index > totalLength) {
253      throw new ArrayIndexOutOfBoundsException(
254          "Index > length: " + index + ", " + totalLength);
255    }
256
257    byte result;
258    // Find the relevant piece by recursive descent
259    if (index < leftLength) {
260      result = left.byteAt(index);
261    } else {
262      result = right.byteAt(index - leftLength);
263    }
264    return result;
265  }
266
267  @Override
268  public int size() {
269    return totalLength;
270  }
271
272  // =================================================================
273  // Pieces
274
275  @Override
276  protected int getTreeDepth() {
277    return treeDepth;
278  }
279
280  /**
281   * Determines if the tree is balanced according to BAP95, which means the tree
282   * is flat-enough with respect to the bounds. Note that this definition of
283   * balanced is one where sub-trees of balanced trees are not necessarily
284   * balanced.
285   *
286   * @return true if the tree is balanced
287   */
288  @Override
289  protected boolean isBalanced() {
290    return totalLength >= minLengthByDepth[treeDepth];
291  }
292
293  /**
294   * Takes a substring of this one. This involves recursive descent along the
295   * left and right edges of the substring, and referencing any wholly contained
296   * segments in between. Any leaf nodes entirely uninvolved in the substring
297   * will not be referenced by the substring.
298   *
299   * <p>Substrings of {@code length < 2} should result in at most a single
300   * recursive call chain, terminating at a leaf node. Thus the result will be a
301   * {@link LiteralByteString}. {@link #RopeByteString(ByteString,
302   * ByteString)}.
303   *
304   * @param beginIndex start at this index
305   * @param endIndex   the last character is the one before this index
306   * @return substring leaf node or tree
307   */
308  @Override
309  public ByteString substring(int beginIndex, int endIndex) {
310    if (beginIndex < 0) {
311      throw new IndexOutOfBoundsException(
312          "Beginning index: " + beginIndex + " < 0");
313    }
314    if (endIndex > totalLength) {
315      throw new IndexOutOfBoundsException(
316          "End index: " + endIndex + " > " + totalLength);
317    }
318    int substringLength = endIndex - beginIndex;
319    if (substringLength < 0) {
320      throw new IndexOutOfBoundsException(
321          "Beginning index larger than ending index: " + beginIndex + ", "
322              + endIndex);
323    }
324
325    ByteString result;
326    if (substringLength == 0) {
327      // Empty substring
328      result = ByteString.EMPTY;
329    } else if (substringLength == totalLength) {
330      // The whole string
331      result = this;
332    } else {
333      // Proper substring
334      if (endIndex <= leftLength) {
335        // Substring on the left
336        result = left.substring(beginIndex, endIndex);
337      } else if (beginIndex >= leftLength) {
338        // Substring on the right
339        result = right
340            .substring(beginIndex - leftLength, endIndex - leftLength);
341      } else {
342        // Split substring
343        ByteString leftSub = left.substring(beginIndex);
344        ByteString rightSub = right.substring(0, endIndex - leftLength);
345        // Intentionally not rebalancing, since in many cases these two
346        // substrings will already be less deep than the top-level
347        // RopeByteString we're taking a substring of.
348        result = new RopeByteString(leftSub, rightSub);
349      }
350    }
351    return result;
352  }
353
354  // =================================================================
355  // ByteString -> byte[]
356
357  @Override
358  protected void copyToInternal(byte[] target, int sourceOffset,
359      int targetOffset, int numberToCopy) {
360   if (sourceOffset + numberToCopy <= leftLength) {
361      left.copyToInternal(target, sourceOffset, targetOffset, numberToCopy);
362    } else if (sourceOffset >= leftLength) {
363      right.copyToInternal(target, sourceOffset - leftLength, targetOffset,
364          numberToCopy);
365    } else {
366      int leftLength = this.leftLength - sourceOffset;
367      left.copyToInternal(target, sourceOffset, targetOffset, leftLength);
368      right.copyToInternal(target, 0, targetOffset + leftLength,
369          numberToCopy - leftLength);
370    }
371  }
372
373  @Override
374  public void copyTo(ByteBuffer target) {
375    left.copyTo(target);
376    right.copyTo(target);
377  }
378
379  @Override
380  public ByteBuffer asReadOnlyByteBuffer() {
381    ByteBuffer byteBuffer = ByteBuffer.wrap(toByteArray());
382    return byteBuffer.asReadOnlyBuffer();
383  }
384
385  @Override
386  public List<ByteBuffer> asReadOnlyByteBufferList() {
387    // Walk through the list of LiteralByteString's that make up this
388    // rope, and add each one as a read-only ByteBuffer.
389    List<ByteBuffer> result = new ArrayList<ByteBuffer>();
390    PieceIterator pieces = new PieceIterator(this);
391    while (pieces.hasNext()) {
392      LiteralByteString byteString = pieces.next();
393      result.add(byteString.asReadOnlyByteBuffer());
394    }
395    return result;
396  }
397
398  @Override
399  public void writeTo(OutputStream outputStream) throws IOException {
400    left.writeTo(outputStream);
401    right.writeTo(outputStream);
402  }
403
404  @Override
405  void writeToInternal(OutputStream out, int sourceOffset,
406      int numberToWrite) throws IOException {
407    if (sourceOffset + numberToWrite <= leftLength) {
408      left.writeToInternal(out, sourceOffset, numberToWrite);
409    } else if (sourceOffset >= leftLength) {
410      right.writeToInternal(out, sourceOffset - leftLength, numberToWrite);
411    } else {
412      int numberToWriteInLeft = leftLength - sourceOffset;
413      left.writeToInternal(out, sourceOffset, numberToWriteInLeft);
414      right.writeToInternal(out, 0, numberToWrite - numberToWriteInLeft);
415    }
416  }
417
418  @Override
419  public String toString(String charsetName)
420      throws UnsupportedEncodingException {
421    return new String(toByteArray(), charsetName);
422  }
423
424  // =================================================================
425  // UTF-8 decoding
426
427  @Override
428  public boolean isValidUtf8() {
429    int leftPartial = left.partialIsValidUtf8(Utf8.COMPLETE, 0, leftLength);
430    int state = right.partialIsValidUtf8(leftPartial, 0, right.size());
431    return state == Utf8.COMPLETE;
432  }
433
434  @Override
435  protected int partialIsValidUtf8(int state, int offset, int length) {
436    int toIndex = offset + length;
437    if (toIndex <= leftLength) {
438      return left.partialIsValidUtf8(state, offset, length);
439    } else if (offset >= leftLength) {
440      return right.partialIsValidUtf8(state, offset - leftLength, length);
441    } else {
442      int leftLength = this.leftLength - offset;
443      int leftPartial = left.partialIsValidUtf8(state, offset, leftLength);
444      return right.partialIsValidUtf8(leftPartial, 0, length - leftLength);
445    }
446  }
447
448  // =================================================================
449  // equals() and hashCode()
450
451  @Override
452  public boolean equals(Object other) {
453    if (other == this) {
454      return true;
455    }
456    if (!(other instanceof ByteString)) {
457      return false;
458    }
459
460    ByteString otherByteString = (ByteString) other;
461    if (totalLength != otherByteString.size()) {
462      return false;
463    }
464    if (totalLength == 0) {
465      return true;
466    }
467
468    // You don't really want to be calling equals on long strings, but since
469    // we cache the hashCode, we effectively cache inequality. We use the cached
470    // hashCode if it's already computed.  It's arguable we should compute the
471    // hashCode here, and if we're going to be testing a bunch of byteStrings,
472    // it might even make sense.
473    if (hash != 0) {
474      int cachedOtherHash = otherByteString.peekCachedHashCode();
475      if (cachedOtherHash != 0 && hash != cachedOtherHash) {
476        return false;
477      }
478    }
479
480    return equalsFragments(otherByteString);
481  }
482
483  /**
484   * Determines if this string is equal to another of the same length by
485   * iterating over the leaf nodes. On each step of the iteration, the
486   * overlapping segments of the leaves are compared.
487   *
488   * @param other string of the same length as this one
489   * @return true if the values of this string equals the value of the given
490   *         one
491   */
492  private boolean equalsFragments(ByteString other) {
493    int thisOffset = 0;
494    Iterator<LiteralByteString> thisIter = new PieceIterator(this);
495    LiteralByteString thisString = thisIter.next();
496
497    int thatOffset = 0;
498    Iterator<LiteralByteString> thatIter = new PieceIterator(other);
499    LiteralByteString thatString = thatIter.next();
500
501    int pos = 0;
502    while (true) {
503      int thisRemaining = thisString.size() - thisOffset;
504      int thatRemaining = thatString.size() - thatOffset;
505      int bytesToCompare = Math.min(thisRemaining, thatRemaining);
506
507      // At least one of the offsets will be zero
508      boolean stillEqual = (thisOffset == 0)
509          ? thisString.equalsRange(thatString, thatOffset, bytesToCompare)
510          : thatString.equalsRange(thisString, thisOffset, bytesToCompare);
511      if (!stillEqual) {
512        return false;
513      }
514
515      pos += bytesToCompare;
516      if (pos >= totalLength) {
517        if (pos == totalLength) {
518          return true;
519        }
520        throw new IllegalStateException();
521      }
522      // We always get to the end of at least one of the pieces
523      if (bytesToCompare == thisRemaining) { // If reached end of this
524        thisOffset = 0;
525        thisString = thisIter.next();
526      } else {
527        thisOffset += bytesToCompare;
528      }
529      if (bytesToCompare == thatRemaining) { // If reached end of that
530        thatOffset = 0;
531        thatString = thatIter.next();
532      } else {
533        thatOffset += bytesToCompare;
534      }
535    }
536  }
537
538  /**
539   * Cached hash value.  Intentionally accessed via a data race, which is safe
540   * because of the Java Memory Model's "no out-of-thin-air values" guarantees
541   * for ints.
542   */
543  private int hash = 0;
544
545  @Override
546  public int hashCode() {
547    int h = hash;
548
549    if (h == 0) {
550      h = totalLength;
551      h = partialHash(h, 0, totalLength);
552      if (h == 0) {
553        h = 1;
554      }
555      hash = h;
556    }
557    return h;
558  }
559
560  @Override
561  protected int peekCachedHashCode() {
562    return hash;
563  }
564
565  @Override
566  protected int partialHash(int h, int offset, int length) {
567    int toIndex = offset + length;
568    if (toIndex <= leftLength) {
569      return left.partialHash(h, offset, length);
570    } else if (offset >= leftLength) {
571      return right.partialHash(h, offset - leftLength, length);
572    } else {
573      int leftLength = this.leftLength - offset;
574      int leftPartial = left.partialHash(h, offset, leftLength);
575      return right.partialHash(leftPartial, 0, length - leftLength);
576    }
577  }
578
579  // =================================================================
580  // Input stream
581
582  @Override
583  public CodedInputStream newCodedInput() {
584    return CodedInputStream.newInstance(new RopeInputStream());
585  }
586
587  @Override
588  public InputStream newInput() {
589    return new RopeInputStream();
590  }
591
592  /**
593   * This class implements the balancing algorithm of BAP95. In the paper the
594   * authors use an array to keep track of pieces, while here we use a stack.
595   * The tree is balanced by traversing subtrees in left to right order, and the
596   * stack always contains the part of the string we've traversed so far.
597   *
598   * <p>One surprising aspect of the algorithm is the result of balancing is not
599   * necessarily balanced, though it is nearly balanced.  For details, see
600   * BAP95.
601   */
602  private static class Balancer {
603    // Stack containing the part of the string, starting from the left, that
604    // we've already traversed.  The final string should be the equivalent of
605    // concatenating the strings on the stack from bottom to top.
606    private final Stack<ByteString> prefixesStack = new Stack<ByteString>();
607
608    private ByteString balance(ByteString left, ByteString right) {
609      doBalance(left);
610      doBalance(right);
611
612      // Sweep stack to gather the result
613      ByteString partialString = prefixesStack.pop();
614      while (!prefixesStack.isEmpty()) {
615        ByteString newLeft = prefixesStack.pop();
616        partialString = new RopeByteString(newLeft, partialString);
617      }
618      // We should end up with a RopeByteString since at a minimum we will
619      // create one from concatenating left and right
620      return partialString;
621    }
622
623    private void doBalance(ByteString root) {
624      // BAP95: Insert balanced subtrees whole. This means the result might not
625      // be balanced, leading to repeated rebalancings on concatenate. However,
626      // these rebalancings are shallow due to ignoring balanced subtrees, and
627      // relatively few calls to insert() result.
628      if (root.isBalanced()) {
629        insert(root);
630      } else if (root instanceof RopeByteString) {
631        RopeByteString rbs = (RopeByteString) root;
632        doBalance(rbs.left);
633        doBalance(rbs.right);
634      } else {
635        throw new IllegalArgumentException(
636            "Has a new type of ByteString been created? Found " +
637                root.getClass());
638      }
639    }
640
641    /**
642     * Push a string on the balance stack (BAP95).  BAP95 uses an array and
643     * calls the elements in the array 'bins'.  We instead use a stack, so the
644     * 'bins' of lengths are represented by differences between the elements of
645     * minLengthByDepth.
646     *
647     * <p>If the length bin for our string, and all shorter length bins, are
648     * empty, we just push it on the stack.  Otherwise, we need to start
649     * concatenating, putting the given string in the "middle" and continuing
650     * until we land in an empty length bin that matches the length of our
651     * concatenation.
652     *
653     * @param byteString string to place on the balance stack
654     */
655    private void insert(ByteString byteString) {
656      int depthBin = getDepthBinForLength(byteString.size());
657      int binEnd = minLengthByDepth[depthBin + 1];
658
659      // BAP95: Concatenate all trees occupying bins representing the length of
660      // our new piece or of shorter pieces, to the extent that is possible.
661      // The goal is to clear the bin which our piece belongs in, but that may
662      // not be entirely possible if there aren't enough longer bins occupied.
663      if (prefixesStack.isEmpty() || prefixesStack.peek().size() >= binEnd) {
664        prefixesStack.push(byteString);
665      } else {
666        int binStart = minLengthByDepth[depthBin];
667
668        // Concatenate the subtrees of shorter length
669        ByteString newTree = prefixesStack.pop();
670        while (!prefixesStack.isEmpty()
671            && prefixesStack.peek().size() < binStart) {
672          ByteString left = prefixesStack.pop();
673          newTree = new RopeByteString(left, newTree);
674        }
675
676        // Concatenate the given string
677        newTree = new RopeByteString(newTree, byteString);
678
679        // Continue concatenating until we land in an empty bin
680        while (!prefixesStack.isEmpty()) {
681          depthBin = getDepthBinForLength(newTree.size());
682          binEnd = minLengthByDepth[depthBin + 1];
683          if (prefixesStack.peek().size() < binEnd) {
684            ByteString left = prefixesStack.pop();
685            newTree = new RopeByteString(left, newTree);
686          } else {
687            break;
688          }
689        }
690        prefixesStack.push(newTree);
691      }
692    }
693
694    private int getDepthBinForLength(int length) {
695      int depth = Arrays.binarySearch(minLengthByDepth, length);
696      if (depth < 0) {
697        // It wasn't an exact match, so convert to the index of the containing
698        // fragment, which is one less even than the insertion point.
699        int insertionPoint = -(depth + 1);
700        depth = insertionPoint - 1;
701      }
702
703      return depth;
704    }
705  }
706
707  /**
708   * This class is a continuable tree traversal, which keeps the state
709   * information which would exist on the stack in a recursive traversal instead
710   * on a stack of "Bread Crumbs". The maximum depth of the stack in this
711   * iterator is the same as the depth of the tree being traversed.
712   *
713   * <p>This iterator is used to implement
714   * {@link RopeByteString#equalsFragments(ByteString)}.
715   */
716  private static class PieceIterator implements Iterator<LiteralByteString> {
717
718    private final Stack<RopeByteString> breadCrumbs =
719        new Stack<RopeByteString>();
720    private LiteralByteString next;
721
722    private PieceIterator(ByteString root) {
723      next = getLeafByLeft(root);
724    }
725
726    private LiteralByteString getLeafByLeft(ByteString root) {
727      ByteString pos = root;
728      while (pos instanceof RopeByteString) {
729        RopeByteString rbs = (RopeByteString) pos;
730        breadCrumbs.push(rbs);
731        pos = rbs.left;
732      }
733      return (LiteralByteString) pos;
734    }
735
736    private LiteralByteString getNextNonEmptyLeaf() {
737      while (true) {
738        // Almost always, we go through this loop exactly once.  However, if
739        // we discover an empty string in the rope, we toss it and try again.
740        if (breadCrumbs.isEmpty()) {
741          return null;
742        } else {
743          LiteralByteString result = getLeafByLeft(breadCrumbs.pop().right);
744          if (!result.isEmpty()) {
745            return result;
746          }
747        }
748      }
749    }
750
751    public boolean hasNext() {
752      return next != null;
753    }
754
755    /**
756     * Returns the next item and advances one {@code LiteralByteString}.
757     *
758     * @return next non-empty LiteralByteString or {@code null}
759     */
760    public LiteralByteString next() {
761      if (next == null) {
762        throw new NoSuchElementException();
763      }
764      LiteralByteString result = next;
765      next = getNextNonEmptyLeaf();
766      return result;
767    }
768
769    public void remove() {
770      throw new UnsupportedOperationException();
771    }
772  }
773
774  // =================================================================
775  // ByteIterator
776
777  @Override
778  public ByteIterator iterator() {
779    return new RopeByteIterator();
780  }
781
782  private class RopeByteIterator implements ByteString.ByteIterator {
783
784    private final PieceIterator pieces;
785    private ByteIterator bytes;
786    int bytesRemaining;
787
788    private RopeByteIterator() {
789      pieces = new PieceIterator(RopeByteString.this);
790      bytes = pieces.next().iterator();
791      bytesRemaining = size();
792    }
793
794    public boolean hasNext() {
795      return (bytesRemaining > 0);
796    }
797
798    public Byte next() {
799      return nextByte(); // Does not instantiate a Byte
800    }
801
802    public byte nextByte() {
803      if (!bytes.hasNext()) {
804        bytes = pieces.next().iterator();
805      }
806      --bytesRemaining;
807      return bytes.nextByte();
808    }
809
810    public void remove() {
811      throw new UnsupportedOperationException();
812    }
813  }
814
815  /**
816   * This class is the {@link RopeByteString} equivalent for
817   * {@link ByteArrayInputStream}.
818   */
819  private class RopeInputStream extends InputStream {
820    // Iterates through the pieces of the rope
821    private PieceIterator pieceIterator;
822    // The current piece
823    private LiteralByteString currentPiece;
824    // The size of the current piece
825    private int currentPieceSize;
826    // The index of the next byte to read in the current piece
827    private int currentPieceIndex;
828    // The offset of the start of the current piece in the rope byte string
829    private int currentPieceOffsetInRope;
830    // Offset in the buffer at which user called mark();
831    private int mark;
832
833    public RopeInputStream() {
834      initialize();
835    }
836
837    @Override
838    public int read(byte b[], int offset, int length)  {
839      if (b == null) {
840        throw new NullPointerException();
841      } else if (offset < 0 || length < 0 || length > b.length - offset) {
842        throw new IndexOutOfBoundsException();
843      }
844      return readSkipInternal(b, offset, length);
845    }
846
847    @Override
848    public long skip(long length) {
849      if (length < 0) {
850        throw new IndexOutOfBoundsException();
851      } else if (length > Integer.MAX_VALUE) {
852        length = Integer.MAX_VALUE;
853      }
854      return readSkipInternal(null, 0, (int) length);
855    }
856
857    /**
858     * Internal implementation of read and skip.  If b != null, then read the
859     * next {@code length} bytes into the buffer {@code b} at
860     * offset {@code offset}.  If b == null, then skip the next {@code length)
861     * bytes.
862     * <p>
863     * This method assumes that all error checking has already happened.
864     * <p>
865     * Returns the actual number of bytes read or skipped.
866     */
867    private int readSkipInternal(byte b[], int offset, int length)  {
868      int bytesRemaining = length;
869      while (bytesRemaining > 0) {
870        advanceIfCurrentPieceFullyRead();
871        if (currentPiece == null) {
872          if (bytesRemaining == length) {
873             // We didn't manage to read anything
874             return -1;
875           }
876          break;
877        } else {
878          // Copy the bytes from this piece.
879          int currentPieceRemaining = currentPieceSize - currentPieceIndex;
880          int count = Math.min(currentPieceRemaining, bytesRemaining);
881          if (b != null) {
882            currentPiece.copyTo(b, currentPieceIndex, offset, count);
883            offset += count;
884          }
885          currentPieceIndex += count;
886          bytesRemaining -= count;
887        }
888      }
889       // Return the number of bytes read.
890      return length - bytesRemaining;
891    }
892
893    @Override
894    public int read() throws IOException {
895      advanceIfCurrentPieceFullyRead();
896      if (currentPiece == null) {
897        return -1;
898      } else {
899        return currentPiece.byteAt(currentPieceIndex++) & 0xFF;
900      }
901    }
902
903    @Override
904    public int available() throws IOException {
905      int bytesRead = currentPieceOffsetInRope + currentPieceIndex;
906      return RopeByteString.this.size() - bytesRead;
907    }
908
909    @Override
910    public boolean markSupported() {
911      return true;
912    }
913
914    @Override
915    public void mark(int readAheadLimit) {
916      // Set the mark to our position in the byte string
917      mark = currentPieceOffsetInRope + currentPieceIndex;
918    }
919
920    @Override
921    public synchronized void reset() {
922      // Just reinitialize and skip the specified number of bytes.
923      initialize();
924      readSkipInternal(null, 0, mark);
925    }
926
927    /** Common initialization code used by both the constructor and reset() */
928    private void initialize() {
929      pieceIterator = new PieceIterator(RopeByteString.this);
930      currentPiece = pieceIterator.next();
931      currentPieceSize = currentPiece.size();
932      currentPieceIndex = 0;
933      currentPieceOffsetInRope = 0;
934    }
935
936    /**
937     * Skips to the next piece if we have read all the data in the current
938     * piece.  Sets currentPiece to null if we have reached the end of the
939     * input.
940     */
941    private void advanceIfCurrentPieceFullyRead() {
942      if (currentPiece != null && currentPieceIndex == currentPieceSize) {
943        // Generally, we can only go through this loop at most once, since
944        // empty strings can't end up in a rope.  But better to test.
945        currentPieceOffsetInRope += currentPieceSize;
946        currentPieceIndex = 0;
947        if (pieceIterator.hasNext()) {
948          currentPiece = pieceIterator.next();
949          currentPieceSize = currentPiece.size();
950        } else {
951          currentPiece = null;
952          currentPieceSize = 0;
953        }
954      }
955    }
956  }
957}
958