1bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor/*
21d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Copyright (C) 2009 The Guava Authors
3bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor *
4bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * Licensed under the Apache License, Version 2.0 (the "License");
5bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * you may not use this file except in compliance with the License.
6bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * You may obtain a copy of the License at
7bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor *
8bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * http://www.apache.org/licenses/LICENSE-2.0
9bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor *
10bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * Unless required by applicable law or agreed to in writing, software
11bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * distributed under the License is distributed on an "AS IS" BASIS,
12bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * See the License for the specific language governing permissions and
14bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * limitations under the License.
15bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */
16bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
17bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnorpackage com.google.common.base;
18bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
19bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnorimport static com.google.common.base.Preconditions.checkArgument;
20bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnorimport static com.google.common.base.Preconditions.checkNotNull;
21bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
221d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertimport com.google.common.annotations.Beta;
231d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertimport com.google.common.annotations.GwtCompatible;
241d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertimport com.google.common.annotations.GwtIncompatible;
251d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
260888a09821a98ac0680fad765217302858e70fa4Paul Duffinimport java.util.ArrayList;
271d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertimport java.util.Collections;
28bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnorimport java.util.Iterator;
291d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertimport java.util.LinkedHashMap;
300888a09821a98ac0680fad765217302858e70fa4Paul Duffinimport java.util.List;
311d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertimport java.util.Map;
32bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnorimport java.util.regex.Matcher;
33bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnorimport java.util.regex.Pattern;
341d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
351d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertimport javax.annotation.CheckReturnValue;
36bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
37bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor/**
387dd252788645e940eada959bdde927426e2531c9Paul Duffin * Extracts non-overlapping substrings from an input string, typically by
397dd252788645e940eada959bdde927426e2531c9Paul Duffin * recognizing appearances of a <i>separator</i> sequence. This separator can be
407dd252788645e940eada959bdde927426e2531c9Paul Duffin * specified as a single {@linkplain #on(char) character}, fixed {@linkplain
417dd252788645e940eada959bdde927426e2531c9Paul Duffin * #on(String) string}, {@linkplain #onPattern regular expression} or {@link
427dd252788645e940eada959bdde927426e2531c9Paul Duffin * #on(CharMatcher) CharMatcher} instance. Or, instead of using a separator at
437dd252788645e940eada959bdde927426e2531c9Paul Duffin * all, a splitter can extract adjacent substrings of a given {@linkplain
447dd252788645e940eada959bdde927426e2531c9Paul Duffin * #fixedLength fixed length}.
45bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor *
467dd252788645e940eada959bdde927426e2531c9Paul Duffin * <p>For example, this expression: <pre>   {@code
47bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor *
487dd252788645e940eada959bdde927426e2531c9Paul Duffin *   Splitter.on(',').split("foo,bar,qux")}</pre>
49bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor *
507dd252788645e940eada959bdde927426e2531c9Paul Duffin * ... produces an {@code Iterable} containing {@code "foo"}, {@code "bar"} and
517dd252788645e940eada959bdde927426e2531c9Paul Duffin * {@code "qux"}, in that order.
52bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor *
537dd252788645e940eada959bdde927426e2531c9Paul Duffin * <p>By default, {@code Splitter}'s behavior is simplistic and unassuming. The
547dd252788645e940eada959bdde927426e2531c9Paul Duffin * following expression: <pre>   {@code
55bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor *
567dd252788645e940eada959bdde927426e2531c9Paul Duffin *   Splitter.on(',').split(" foo,,,  bar ,")}</pre>
57bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor *
587dd252788645e940eada959bdde927426e2531c9Paul Duffin * ... yields the substrings {@code [" foo", "", "", "  bar ", ""]}. If this
597dd252788645e940eada959bdde927426e2531c9Paul Duffin * is not the desired behavior, use configuration methods to obtain a <i>new</i>
607dd252788645e940eada959bdde927426e2531c9Paul Duffin * splitter instance with modified behavior: <pre>   {@code
61bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor *
62bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor *   private static final Splitter MY_SPLITTER = Splitter.on(',')
63bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor *       .trimResults()
64bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor *       .omitEmptyStrings();}</pre>
65bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor *
660888a09821a98ac0680fad765217302858e70fa4Paul Duffin * <p>Now {@code MY_SPLITTER.split("foo,,,  bar ,")} returns just {@code ["foo",
677dd252788645e940eada959bdde927426e2531c9Paul Duffin * "bar"]}. Note that the order in which these configuration methods are called
687dd252788645e940eada959bdde927426e2531c9Paul Duffin * is never significant.
69bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor *
707dd252788645e940eada959bdde927426e2531c9Paul Duffin * <p><b>Warning:</b> Splitter instances are immutable. Invoking a configuration
717dd252788645e940eada959bdde927426e2531c9Paul Duffin * method has no effect on the receiving instance; you must store and use the
727dd252788645e940eada959bdde927426e2531c9Paul Duffin * new splitter instance it returns instead. <pre>   {@code
73bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor *
747dd252788645e940eada959bdde927426e2531c9Paul Duffin *   // Do NOT do this
75bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor *   Splitter splitter = Splitter.on('/');
76bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor *   splitter.trimResults(); // does nothing!
77bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor *   return splitter.split("wrong / wrong / wrong");}</pre>
78bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor *
797dd252788645e940eada959bdde927426e2531c9Paul Duffin * <p>For separator-based splitters that do not use {@code omitEmptyStrings}, an
807dd252788645e940eada959bdde927426e2531c9Paul Duffin * input string containing {@code n} occurrences of the separator naturally
817dd252788645e940eada959bdde927426e2531c9Paul Duffin * yields an iterable of size {@code n + 1}. So if the separator does not occur
827dd252788645e940eada959bdde927426e2531c9Paul Duffin * anywhere in the input, a single substring is returned containing the entire
837dd252788645e940eada959bdde927426e2531c9Paul Duffin * input. Consequently, all splitters split the empty string to {@code [""]}
847dd252788645e940eada959bdde927426e2531c9Paul Duffin * (note: even fixed-length splitters).
85bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor *
867dd252788645e940eada959bdde927426e2531c9Paul Duffin * <p>Splitter instances are thread-safe immutable, and are therefore safe to
877dd252788645e940eada959bdde927426e2531c9Paul Duffin * store as {@code static final} constants.
887dd252788645e940eada959bdde927426e2531c9Paul Duffin *
897dd252788645e940eada959bdde927426e2531c9Paul Duffin * <p>The {@link Joiner} class provides the inverse operation to splitting, but
907dd252788645e940eada959bdde927426e2531c9Paul Duffin * note that a round-trip between the two should be assumed to be lossy.
917dd252788645e940eada959bdde927426e2531c9Paul Duffin *
927dd252788645e940eada959bdde927426e2531c9Paul Duffin * <p>See the Guava User Guide article on <a href=
937dd252788645e940eada959bdde927426e2531c9Paul Duffin * "http://code.google.com/p/guava-libraries/wiki/StringsExplained#Splitter">
947dd252788645e940eada959bdde927426e2531c9Paul Duffin * {@code Splitter}</a>.
95bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor *
96bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * @author Julien Silland
97bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * @author Jesse Wilson
98bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * @author Kevin Bourrillion
991d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * @author Louis Wasserman
1001d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * @since 1.0
101bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */
1021d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert@GwtCompatible(emulated = true)
103bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnorpublic final class Splitter {
104bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  private final CharMatcher trimmer;
105bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  private final boolean omitEmptyStrings;
106bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  private final Strategy strategy;
1071d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  private final int limit;
108bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
109bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  private Splitter(Strategy strategy) {
1101d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    this(strategy, false, CharMatcher.NONE, Integer.MAX_VALUE);
111bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
112bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
1130888a09821a98ac0680fad765217302858e70fa4Paul Duffin  private Splitter(Strategy strategy, boolean omitEmptyStrings,
1140888a09821a98ac0680fad765217302858e70fa4Paul Duffin      CharMatcher trimmer, int limit) {
115bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    this.strategy = strategy;
116bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    this.omitEmptyStrings = omitEmptyStrings;
117bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    this.trimmer = trimmer;
1181d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    this.limit = limit;
119bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
120bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
121bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
122bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * Returns a splitter that uses the given single-character separator. For
123bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * example, {@code Splitter.on(',').split("foo,,bar")} returns an iterable
124bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * containing {@code ["foo", "", "bar"]}.
125bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
126bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @param separator the character to recognize as a separator
127bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @return a splitter, with default settings, that recognizes that separator
128bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
129bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public static Splitter on(char separator) {
130bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return on(CharMatcher.is(separator));
131bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
132bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
133bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
134bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * Returns a splitter that considers any single character matched by the
135bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * given {@code CharMatcher} to be a separator. For example, {@code
136bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * Splitter.on(CharMatcher.anyOf(";,")).split("foo,;bar,quux")} returns an
137bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * iterable containing {@code ["foo", "", "bar", "quux"]}.
138bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
139bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @param separatorMatcher a {@link CharMatcher} that determines whether a
140bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *     character is a separator
141bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @return a splitter, with default settings, that uses this matcher
142bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
143bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public static Splitter on(final CharMatcher separatorMatcher) {
144bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    checkNotNull(separatorMatcher);
145bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
146bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return new Splitter(new Strategy() {
1470888a09821a98ac0680fad765217302858e70fa4Paul Duffin      @Override public SplittingIterator iterator(
1480888a09821a98ac0680fad765217302858e70fa4Paul Duffin          Splitter splitter, final CharSequence toSplit) {
149bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return new SplittingIterator(splitter, toSplit) {
1500888a09821a98ac0680fad765217302858e70fa4Paul Duffin          @Override int separatorStart(int start) {
151bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor            return separatorMatcher.indexIn(toSplit, start);
152bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          }
153bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
1540888a09821a98ac0680fad765217302858e70fa4Paul Duffin          @Override int separatorEnd(int separatorPosition) {
155bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor            return separatorPosition + 1;
156bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          }
157bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        };
158bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
159bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    });
160bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
161bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
162bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
163bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * Returns a splitter that uses the given fixed string as a separator. For
1647dd252788645e940eada959bdde927426e2531c9Paul Duffin   * example, {@code Splitter.on(", ").split("foo, bar,baz")} returns an
1657dd252788645e940eada959bdde927426e2531c9Paul Duffin   * iterable containing {@code ["foo", "bar,baz"]}.
166bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
167bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @param separator the literal, nonempty string to recognize as a separator
168bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @return a splitter, with default settings, that recognizes that separator
169bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
170bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public static Splitter on(final String separator) {
1710888a09821a98ac0680fad765217302858e70fa4Paul Duffin    checkArgument(separator.length() != 0,
1720888a09821a98ac0680fad765217302858e70fa4Paul Duffin        "The separator may not be the empty string.");
173bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
174bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return new Splitter(new Strategy() {
1750888a09821a98ac0680fad765217302858e70fa4Paul Duffin      @Override public SplittingIterator iterator(
1760888a09821a98ac0680fad765217302858e70fa4Paul Duffin          Splitter splitter, CharSequence toSplit) {
177bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return new SplittingIterator(splitter, toSplit) {
1780888a09821a98ac0680fad765217302858e70fa4Paul Duffin          @Override public int separatorStart(int start) {
1790888a09821a98ac0680fad765217302858e70fa4Paul Duffin            int separatorLength = separator.length();
180bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
1810888a09821a98ac0680fad765217302858e70fa4Paul Duffin            positions:
1820888a09821a98ac0680fad765217302858e70fa4Paul Duffin            for (int p = start, last = toSplit.length() - separatorLength;
1830888a09821a98ac0680fad765217302858e70fa4Paul Duffin                p <= last; p++) {
1840888a09821a98ac0680fad765217302858e70fa4Paul Duffin              for (int i = 0; i < separatorLength; i++) {
185bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor                if (toSplit.charAt(i + p) != separator.charAt(i)) {
186bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor                  continue positions;
187bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor                }
188bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor              }
189bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor              return p;
190bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor            }
191bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor            return -1;
192bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          }
193bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
1940888a09821a98ac0680fad765217302858e70fa4Paul Duffin          @Override public int separatorEnd(int separatorPosition) {
195bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor            return separatorPosition + separator.length();
196bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          }
197bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        };
198bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
199bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    });
200bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
201bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
202bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
203bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * Returns a splitter that considers any subsequence matching {@code
204bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * pattern} to be a separator. For example, {@code
205bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * Splitter.on(Pattern.compile("\r?\n")).split(entireFile)} splits a string
206bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * into lines whether it uses DOS-style or UNIX-style line terminators.
207bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
208bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @param separatorPattern the pattern that determines whether a subsequence
209bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *     is a separator. This pattern may not match the empty string.
210bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @return a splitter, with default settings, that uses this pattern
211bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @throws IllegalArgumentException if {@code separatorPattern} matches the
212bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *     empty string
213bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
2141d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  @GwtIncompatible("java.util.regex")
215bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public static Splitter on(final Pattern separatorPattern) {
216bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    checkNotNull(separatorPattern);
217bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    checkArgument(!separatorPattern.matcher("").matches(),
218bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        "The pattern may not match the empty string: %s", separatorPattern);
219bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
220bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return new Splitter(new Strategy() {
2210888a09821a98ac0680fad765217302858e70fa4Paul Duffin      @Override public SplittingIterator iterator(
2220888a09821a98ac0680fad765217302858e70fa4Paul Duffin          final Splitter splitter, CharSequence toSplit) {
223bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        final Matcher matcher = separatorPattern.matcher(toSplit);
224bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return new SplittingIterator(splitter, toSplit) {
2250888a09821a98ac0680fad765217302858e70fa4Paul Duffin          @Override public int separatorStart(int start) {
226bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor            return matcher.find(start) ? matcher.start() : -1;
227bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          }
228bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
2290888a09821a98ac0680fad765217302858e70fa4Paul Duffin          @Override public int separatorEnd(int separatorPosition) {
230bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor            return matcher.end();
231bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          }
232bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        };
233bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
234bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    });
235bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
236bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
237bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
238bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * Returns a splitter that considers any subsequence matching a given
239bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * pattern (regular expression) to be a separator. For example, {@code
240bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * Splitter.onPattern("\r?\n").split(entireFile)} splits a string into lines
241bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * whether it uses DOS-style or UNIX-style line terminators. This is
242bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * equivalent to {@code Splitter.on(Pattern.compile(pattern))}.
243bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
244bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @param separatorPattern the pattern that determines whether a subsequence
245bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *     is a separator. This pattern may not match the empty string.
246bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @return a splitter, with default settings, that uses this pattern
2471d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * @throws java.util.regex.PatternSyntaxException if {@code separatorPattern}
2481d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   *     is a malformed expression
249bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @throws IllegalArgumentException if {@code separatorPattern} matches the
250bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *     empty string
251bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
2521d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  @GwtIncompatible("java.util.regex")
253bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public static Splitter onPattern(String separatorPattern) {
254bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return on(Pattern.compile(separatorPattern));
255bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
256bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
257bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
258bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * Returns a splitter that divides strings into pieces of the given length.
2591d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * For example, {@code Splitter.fixedLength(2).split("abcde")} returns an
260bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * iterable containing {@code ["ab", "cd", "e"]}. The last piece can be
261bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * smaller than {@code length} but will never be empty.
262bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
2637dd252788645e940eada959bdde927426e2531c9Paul Duffin   * <p><b>Exception:</b> for consistency with separator-based splitters, {@code
2647dd252788645e940eada959bdde927426e2531c9Paul Duffin   * split("")} does not yield an empty iterable, but an iterable containing
2657dd252788645e940eada959bdde927426e2531c9Paul Duffin   * {@code ""}. This is the only case in which {@code
2667dd252788645e940eada959bdde927426e2531c9Paul Duffin   * Iterables.size(split(input))} does not equal {@code
2677dd252788645e940eada959bdde927426e2531c9Paul Duffin   * IntMath.divide(input.length(), length, CEILING)}. To avoid this behavior,
2687dd252788645e940eada959bdde927426e2531c9Paul Duffin   * use {@code omitEmptyStrings}.
2697dd252788645e940eada959bdde927426e2531c9Paul Duffin   *
2707dd252788645e940eada959bdde927426e2531c9Paul Duffin   * @param length the desired length of pieces after splitting, a positive
2717dd252788645e940eada959bdde927426e2531c9Paul Duffin   *     integer
272bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @return a splitter, with default settings, that can split into fixed sized
273bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *     pieces
2747dd252788645e940eada959bdde927426e2531c9Paul Duffin   * @throws IllegalArgumentException if {@code length} is zero or negative
275bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
276bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public static Splitter fixedLength(final int length) {
277bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    checkArgument(length > 0, "The length may not be less than 1");
278bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
279bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return new Splitter(new Strategy() {
2800888a09821a98ac0680fad765217302858e70fa4Paul Duffin      @Override public SplittingIterator iterator(
2810888a09821a98ac0680fad765217302858e70fa4Paul Duffin          final Splitter splitter, CharSequence toSplit) {
282bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return new SplittingIterator(splitter, toSplit) {
2830888a09821a98ac0680fad765217302858e70fa4Paul Duffin          @Override public int separatorStart(int start) {
284bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor            int nextChunkStart = start + length;
285bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor            return (nextChunkStart < toSplit.length() ? nextChunkStart : -1);
286bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          }
287bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
2880888a09821a98ac0680fad765217302858e70fa4Paul Duffin          @Override public int separatorEnd(int separatorPosition) {
289bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor            return separatorPosition;
290bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          }
291bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        };
292bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
293bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    });
294bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
295bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
296bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
297bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * Returns a splitter that behaves equivalently to {@code this} splitter, but
298bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * automatically omits empty strings from the results. For example, {@code
299bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * Splitter.on(',').omitEmptyStrings().split(",a,,,b,c,,")} returns an
300bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * iterable containing only {@code ["a", "b", "c"]}.
301bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
302bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * <p>If either {@code trimResults} option is also specified when creating a
303bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * splitter, that splitter always trims results first before checking for
304bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * emptiness. So, for example, {@code
305bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * Splitter.on(':').omitEmptyStrings().trimResults().split(": : : ")} returns
306bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * an empty iterable.
307bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
308bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * <p>Note that it is ordinarily not possible for {@link #split(CharSequence)}
309bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * to return an empty iterable, but when using this option, it can (if the
310bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * input sequence consists of nothing but separators).
311bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
312bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @return a splitter with the desired configuration
313bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
3141d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  @CheckReturnValue
315bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public Splitter omitEmptyStrings() {
3161d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    return new Splitter(strategy, true, trimmer, limit);
3171d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  }
3181d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
3191d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  /**
3201d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Returns a splitter that behaves equivalently to {@code this} splitter but
3211d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * stops splitting after it reaches the limit.
3221d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * The limit defines the maximum number of items returned by the iterator.
3231d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   *
3241d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * <p>For example,
3251d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * {@code Splitter.on(',').limit(3).split("a,b,c,d")} returns an iterable
3261d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * containing {@code ["a", "b", "c,d"]}.  When omitting empty strings, the
3271d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * omitted strings do no count.  Hence,
3281d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * {@code Splitter.on(',').limit(3).omitEmptyStrings().split("a,,,b,,,c,d")}
3291d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * returns an iterable containing {@code ["a", "b", "c,d"}.
3301d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * When trim is requested, all entries, including the last are trimmed.  Hence
3311d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * {@code Splitter.on(',').limit(3).trimResults().split(" a , b , c , d ")}
3321d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * results in @{code ["a", "b", "c , d"]}.
3331d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   *
3341d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * @param limit the maximum number of items returns
3351d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * @return a splitter with the desired configuration
3361d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * @since 9.0
3371d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   */
3381d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  @CheckReturnValue
3391d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  public Splitter limit(int limit) {
3401d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    checkArgument(limit > 0, "must be greater than zero: %s", limit);
3411d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    return new Splitter(strategy, omitEmptyStrings, trimmer, limit);
342bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
343bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
344bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
345bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * Returns a splitter that behaves equivalently to {@code this} splitter, but
346bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * automatically removes leading and trailing {@linkplain
347bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * CharMatcher#WHITESPACE whitespace} from each returned substring; equivalent
348bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * to {@code trimResults(CharMatcher.WHITESPACE)}. For example, {@code
3491d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Splitter.on(',').trimResults().split(" a, b ,c ")} returns an iterable
350bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * containing {@code ["a", "b", "c"]}.
351bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
352bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @return a splitter with the desired configuration
353bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
3541d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  @CheckReturnValue
355bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public Splitter trimResults() {
356bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return trimResults(CharMatcher.WHITESPACE);
357bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
358bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
359bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
360bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * Returns a splitter that behaves equivalently to {@code this} splitter, but
361bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * removes all leading or trailing characters matching the given {@code
362bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * CharMatcher} from each returned substring. For example, {@code
363bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * Splitter.on(',').trimResults(CharMatcher.is('_')).split("_a ,_b_ ,c__")}
364bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * returns an iterable containing {@code ["a ", "b_ ", "c"]}.
365bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
366bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @param trimmer a {@link CharMatcher} that determines whether a character
367bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *     should be removed from the beginning/end of a subsequence
368bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @return a splitter with the desired configuration
369bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
3701d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  // TODO(kevinb): throw if a trimmer was already specified!
3711d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  @CheckReturnValue
372bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public Splitter trimResults(CharMatcher trimmer) {
373bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    checkNotNull(trimmer);
3741d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    return new Splitter(strategy, omitEmptyStrings, trimmer, limit);
375bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
376bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
377bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
3781d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Splits {@code sequence} into string components and makes them available
3790888a09821a98ac0680fad765217302858e70fa4Paul Duffin   * through an {@link Iterator}, which may be lazily evaluated. If you want
3800888a09821a98ac0680fad765217302858e70fa4Paul Duffin   * an eagerly computed {@link List}, use {@link #splitToList(CharSequence)}.
381bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
382bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @param sequence the sequence of characters to split
383bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @return an iteration over the segments split from the parameter.
384bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
385bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public Iterable<String> split(final CharSequence sequence) {
386bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    checkNotNull(sequence);
387bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
388bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return new Iterable<String>() {
3890888a09821a98ac0680fad765217302858e70fa4Paul Duffin      @Override public Iterator<String> iterator() {
3900888a09821a98ac0680fad765217302858e70fa4Paul Duffin        return splittingIterator(sequence);
391bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
3920888a09821a98ac0680fad765217302858e70fa4Paul Duffin      @Override public String toString() {
3930888a09821a98ac0680fad765217302858e70fa4Paul Duffin        return Joiner.on(", ")
3940888a09821a98ac0680fad765217302858e70fa4Paul Duffin            .appendTo(new StringBuilder().append('['), this)
3950888a09821a98ac0680fad765217302858e70fa4Paul Duffin            .append(']')
3967dd252788645e940eada959bdde927426e2531c9Paul Duffin            .toString();
3977dd252788645e940eada959bdde927426e2531c9Paul Duffin      }
398bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    };
399bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
400bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
4010888a09821a98ac0680fad765217302858e70fa4Paul Duffin  private Iterator<String> splittingIterator(CharSequence sequence) {
4021d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    return strategy.iterator(this, sequence);
4031d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  }
4041d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
4051d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  /**
4060888a09821a98ac0680fad765217302858e70fa4Paul Duffin   * Splits {@code sequence} into string components and returns them as
4070888a09821a98ac0680fad765217302858e70fa4Paul Duffin   * an immutable list. If you want an {@link Iterable} which may be lazily
4080888a09821a98ac0680fad765217302858e70fa4Paul Duffin   * evaluated, use {@link #split(CharSequence)}.
4090888a09821a98ac0680fad765217302858e70fa4Paul Duffin   *
4100888a09821a98ac0680fad765217302858e70fa4Paul Duffin   * @param sequence the sequence of characters to split
4110888a09821a98ac0680fad765217302858e70fa4Paul Duffin   * @return an immutable list of the segments split from the parameter
4120888a09821a98ac0680fad765217302858e70fa4Paul Duffin   * @since 15.0
4130888a09821a98ac0680fad765217302858e70fa4Paul Duffin   */
4140888a09821a98ac0680fad765217302858e70fa4Paul Duffin  @Beta
4150888a09821a98ac0680fad765217302858e70fa4Paul Duffin  public List<String> splitToList(CharSequence sequence) {
4160888a09821a98ac0680fad765217302858e70fa4Paul Duffin    checkNotNull(sequence);
4170888a09821a98ac0680fad765217302858e70fa4Paul Duffin
4180888a09821a98ac0680fad765217302858e70fa4Paul Duffin    Iterator<String> iterator = splittingIterator(sequence);
4190888a09821a98ac0680fad765217302858e70fa4Paul Duffin    List<String> result = new ArrayList<String>();
4200888a09821a98ac0680fad765217302858e70fa4Paul Duffin
4210888a09821a98ac0680fad765217302858e70fa4Paul Duffin    while (iterator.hasNext()) {
4220888a09821a98ac0680fad765217302858e70fa4Paul Duffin      result.add(iterator.next());
4230888a09821a98ac0680fad765217302858e70fa4Paul Duffin    }
4240888a09821a98ac0680fad765217302858e70fa4Paul Duffin
4250888a09821a98ac0680fad765217302858e70fa4Paul Duffin    return Collections.unmodifiableList(result);
4260888a09821a98ac0680fad765217302858e70fa4Paul Duffin  }
4270888a09821a98ac0680fad765217302858e70fa4Paul Duffin
4280888a09821a98ac0680fad765217302858e70fa4Paul Duffin  /**
4291d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Returns a {@code MapSplitter} which splits entries based on this splitter,
4301d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * and splits entries into keys and values using the specified separator.
4311d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   *
4321d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * @since 10.0
4331d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   */
4341d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  @CheckReturnValue
4351d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  @Beta
4361d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  public MapSplitter withKeyValueSeparator(String separator) {
4371d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    return withKeyValueSeparator(on(separator));
4381d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  }
4391d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
4401d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  /**
4411d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Returns a {@code MapSplitter} which splits entries based on this splitter,
4427dd252788645e940eada959bdde927426e2531c9Paul Duffin   * and splits entries into keys and values using the specified separator.
4437dd252788645e940eada959bdde927426e2531c9Paul Duffin   *
4447dd252788645e940eada959bdde927426e2531c9Paul Duffin   * @since 14.0
4457dd252788645e940eada959bdde927426e2531c9Paul Duffin   */
4467dd252788645e940eada959bdde927426e2531c9Paul Duffin  @CheckReturnValue
4477dd252788645e940eada959bdde927426e2531c9Paul Duffin  @Beta
4487dd252788645e940eada959bdde927426e2531c9Paul Duffin  public MapSplitter withKeyValueSeparator(char separator) {
4497dd252788645e940eada959bdde927426e2531c9Paul Duffin    return withKeyValueSeparator(on(separator));
4507dd252788645e940eada959bdde927426e2531c9Paul Duffin  }
4517dd252788645e940eada959bdde927426e2531c9Paul Duffin
4527dd252788645e940eada959bdde927426e2531c9Paul Duffin  /**
4537dd252788645e940eada959bdde927426e2531c9Paul Duffin   * Returns a {@code MapSplitter} which splits entries based on this splitter,
4541d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * and splits entries into keys and values using the specified key-value
4551d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * splitter.
4561d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   *
4571d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * @since 10.0
4581d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   */
4591d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  @CheckReturnValue
4601d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  @Beta
4611d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  public MapSplitter withKeyValueSeparator(Splitter keyValueSplitter) {
4621d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    return new MapSplitter(this, keyValueSplitter);
4631d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  }
4641d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
4651d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  /**
4661d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * An object that splits strings into maps as {@code Splitter} splits
4671d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * iterables and lists. Like {@code Splitter}, it is thread-safe and
4681d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * immutable.
4691d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   *
4701d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * @since 10.0
4711d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   */
4721d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  @Beta
4731d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  public static final class MapSplitter {
4740888a09821a98ac0680fad765217302858e70fa4Paul Duffin    private static final String INVALID_ENTRY_MESSAGE =
4750888a09821a98ac0680fad765217302858e70fa4Paul Duffin        "Chunk [%s] is not a valid entry";
4761d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    private final Splitter outerSplitter;
4771d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    private final Splitter entrySplitter;
4781d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
4791d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    private MapSplitter(Splitter outerSplitter, Splitter entrySplitter) {
4801d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      this.outerSplitter = outerSplitter; // only "this" is passed
4811d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      this.entrySplitter = checkNotNull(entrySplitter);
4821d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    }
4831d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
4841d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    /**
4851d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert     * Splits {@code sequence} into substrings, splits each substring into
4861d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert     * an entry, and returns an unmodifiable map with each of the entries. For
4871d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert     * example, <code>
4881d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert     * Splitter.on(';').trimResults().withKeyValueSeparator("=>")
4891d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert     * .split("a=>b ; c=>b")
4901d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert     * </code> will return a mapping from {@code "a"} to {@code "b"} and
4911d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert     * {@code "c"} to {@code b}.
4921d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert     *
4931d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert     * <p>The returned map preserves the order of the entries from
4941d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert     * {@code sequence}.
4951d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert     *
4961d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert     * @throws IllegalArgumentException if the specified sequence does not split
4971d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert     *         into valid map entries, or if there are duplicate keys
4981d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert     */
4991d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    public Map<String, String> split(CharSequence sequence) {
5001d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      Map<String, String> map = new LinkedHashMap<String, String>();
5011d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      for (String entry : outerSplitter.split(sequence)) {
5020888a09821a98ac0680fad765217302858e70fa4Paul Duffin        Iterator<String> entryFields = entrySplitter.splittingIterator(entry);
5031d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
5041d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        checkArgument(entryFields.hasNext(), INVALID_ENTRY_MESSAGE, entry);
5051d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        String key = entryFields.next();
5061d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        checkArgument(!map.containsKey(key), "Duplicate key [%s] found.", key);
5071d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
5081d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        checkArgument(entryFields.hasNext(), INVALID_ENTRY_MESSAGE, entry);
5091d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        String value = entryFields.next();
5101d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        map.put(key, value);
5111d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
5121d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        checkArgument(!entryFields.hasNext(), INVALID_ENTRY_MESSAGE, entry);
5131d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      }
5141d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      return Collections.unmodifiableMap(map);
5151d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    }
5161d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  }
5171d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
518bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  private interface Strategy {
519bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    Iterator<String> iterator(Splitter splitter, CharSequence toSplit);
520bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
521bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
5227dd252788645e940eada959bdde927426e2531c9Paul Duffin  private abstract static class SplittingIterator extends AbstractIterator<String> {
523bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    final CharSequence toSplit;
524bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    final CharMatcher trimmer;
525bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    final boolean omitEmptyStrings;
526bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
527bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    /**
528bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor     * Returns the first index in {@code toSplit} at or after {@code start}
529bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor     * that contains the separator.
530bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor     */
531bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    abstract int separatorStart(int start);
532bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
533bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    /**
534bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor     * Returns the first index in {@code toSplit} after {@code
535bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor     * separatorPosition} that does not contain a separator. This method is only
536bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor     * invoked after a call to {@code separatorStart}.
537bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor     */
538bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    abstract int separatorEnd(int separatorPosition);
539bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
540bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    int offset = 0;
5411d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    int limit;
542bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
543bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    protected SplittingIterator(Splitter splitter, CharSequence toSplit) {
544bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      this.trimmer = splitter.trimmer;
545bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      this.omitEmptyStrings = splitter.omitEmptyStrings;
5461d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      this.limit = splitter.limit;
547bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      this.toSplit = toSplit;
548bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
549bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
5500888a09821a98ac0680fad765217302858e70fa4Paul Duffin    @Override protected String computeNext() {
5517dd252788645e940eada959bdde927426e2531c9Paul Duffin      /*
5527dd252788645e940eada959bdde927426e2531c9Paul Duffin       * The returned string will be from the end of the last match to the
5537dd252788645e940eada959bdde927426e2531c9Paul Duffin       * beginning of the next one. nextStart is the start position of the
5547dd252788645e940eada959bdde927426e2531c9Paul Duffin       * returned substring, while offset is the place to start looking for a
5557dd252788645e940eada959bdde927426e2531c9Paul Duffin       * separator.
5567dd252788645e940eada959bdde927426e2531c9Paul Duffin       */
5577dd252788645e940eada959bdde927426e2531c9Paul Duffin      int nextStart = offset;
558bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      while (offset != -1) {
5597dd252788645e940eada959bdde927426e2531c9Paul Duffin        int start = nextStart;
560bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        int end;
561bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
562bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        int separatorPosition = separatorStart(offset);
563bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        if (separatorPosition == -1) {
564bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          end = toSplit.length();
565bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          offset = -1;
566bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        } else {
567bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          end = separatorPosition;
568bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          offset = separatorEnd(separatorPosition);
569bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        }
5707dd252788645e940eada959bdde927426e2531c9Paul Duffin        if (offset == nextStart) {
5717dd252788645e940eada959bdde927426e2531c9Paul Duffin          /*
5727dd252788645e940eada959bdde927426e2531c9Paul Duffin           * This occurs when some pattern has an empty match, even if it
5737dd252788645e940eada959bdde927426e2531c9Paul Duffin           * doesn't match the empty string -- for example, if it requires
5747dd252788645e940eada959bdde927426e2531c9Paul Duffin           * lookahead or the like. The offset must be increased to look for
5757dd252788645e940eada959bdde927426e2531c9Paul Duffin           * separators beyond this point, without changing the start position
5767dd252788645e940eada959bdde927426e2531c9Paul Duffin           * of the next returned substring -- so nextStart stays the same.
5777dd252788645e940eada959bdde927426e2531c9Paul Duffin           */
5787dd252788645e940eada959bdde927426e2531c9Paul Duffin          offset++;
5797dd252788645e940eada959bdde927426e2531c9Paul Duffin          if (offset >= toSplit.length()) {
5807dd252788645e940eada959bdde927426e2531c9Paul Duffin            offset = -1;
5817dd252788645e940eada959bdde927426e2531c9Paul Duffin          }
5827dd252788645e940eada959bdde927426e2531c9Paul Duffin          continue;
5837dd252788645e940eada959bdde927426e2531c9Paul Duffin        }
584bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
585bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        while (start < end && trimmer.matches(toSplit.charAt(start))) {
586bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          start++;
587bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        }
588bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        while (end > start && trimmer.matches(toSplit.charAt(end - 1))) {
589bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          end--;
590bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        }
591bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
592bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        if (omitEmptyStrings && start == end) {
5937dd252788645e940eada959bdde927426e2531c9Paul Duffin          // Don't include the (unused) separator in next split string.
5947dd252788645e940eada959bdde927426e2531c9Paul Duffin          nextStart = offset;
595bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          continue;
596bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        }
597bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
5981d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        if (limit == 1) {
5991d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          // The limit has been reached, return the rest of the string as the
6001d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          // final item.  This is tested after empty string removal so that
6011d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          // empty strings do not count towards the limit.
6021d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          end = toSplit.length();
6031d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          offset = -1;
6041d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          // Since we may have changed the end, we need to trim it again.
6051d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          while (end > start && trimmer.matches(toSplit.charAt(end - 1))) {
6061d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert            end--;
6071d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          }
6081d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        } else {
6091d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          limit--;
6101d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
6111d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
612bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return toSplit.subSequence(start, end).toString();
613bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
614bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      return endOfData();
615bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
616bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
617bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor}
618