10888a09821a98ac0680fad765217302858e70fa4Paul Duffin/* 20888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Copyright (C) 2009 The Guava Authors 30888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 40888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Licensed under the Apache License, Version 2.0 (the "License"); 50888a09821a98ac0680fad765217302858e70fa4Paul Duffin * you may not use this file except in compliance with the License. 60888a09821a98ac0680fad765217302858e70fa4Paul Duffin * You may obtain a copy of the License at 70888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 80888a09821a98ac0680fad765217302858e70fa4Paul Duffin * http://www.apache.org/licenses/LICENSE-2.0 90888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 100888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Unless required by applicable law or agreed to in writing, software 110888a09821a98ac0680fad765217302858e70fa4Paul Duffin * distributed under the License is distributed on an "AS IS" BASIS, 120888a09821a98ac0680fad765217302858e70fa4Paul Duffin * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 130888a09821a98ac0680fad765217302858e70fa4Paul Duffin * See the License for the specific language governing permissions and 140888a09821a98ac0680fad765217302858e70fa4Paul Duffin * limitations under the License. 150888a09821a98ac0680fad765217302858e70fa4Paul Duffin */ 160888a09821a98ac0680fad765217302858e70fa4Paul Duffin 170888a09821a98ac0680fad765217302858e70fa4Paul Duffinpackage com.google.common.base; 180888a09821a98ac0680fad765217302858e70fa4Paul Duffin 190888a09821a98ac0680fad765217302858e70fa4Paul Duffinimport static com.google.common.base.Preconditions.checkArgument; 200888a09821a98ac0680fad765217302858e70fa4Paul Duffinimport static com.google.common.base.Preconditions.checkNotNull; 210888a09821a98ac0680fad765217302858e70fa4Paul Duffin 220888a09821a98ac0680fad765217302858e70fa4Paul Duffinimport com.google.common.annotations.Beta; 230888a09821a98ac0680fad765217302858e70fa4Paul Duffinimport com.google.common.annotations.GwtCompatible; 240888a09821a98ac0680fad765217302858e70fa4Paul Duffin 250888a09821a98ac0680fad765217302858e70fa4Paul Duffinimport java.util.ArrayList; 260888a09821a98ac0680fad765217302858e70fa4Paul Duffinimport java.util.Collections; 270888a09821a98ac0680fad765217302858e70fa4Paul Duffinimport java.util.Iterator; 280888a09821a98ac0680fad765217302858e70fa4Paul Duffinimport java.util.LinkedHashMap; 290888a09821a98ac0680fad765217302858e70fa4Paul Duffinimport java.util.List; 300888a09821a98ac0680fad765217302858e70fa4Paul Duffinimport java.util.Map; 310888a09821a98ac0680fad765217302858e70fa4Paul Duffin 320888a09821a98ac0680fad765217302858e70fa4Paul Duffinimport javax.annotation.CheckReturnValue; 330888a09821a98ac0680fad765217302858e70fa4Paul Duffin 340888a09821a98ac0680fad765217302858e70fa4Paul Duffin/** 350888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Extracts non-overlapping substrings from an input string, typically by 360888a09821a98ac0680fad765217302858e70fa4Paul Duffin * recognizing appearances of a <i>separator</i> sequence. This separator can be 370888a09821a98ac0680fad765217302858e70fa4Paul Duffin * specified as a single {@linkplain #on(char) character}, fixed {@linkplain 380888a09821a98ac0680fad765217302858e70fa4Paul Duffin * #on(String) string}, {@linkplain #onPattern regular expression} or {@link 390888a09821a98ac0680fad765217302858e70fa4Paul Duffin * #on(CharMatcher) CharMatcher} instance. Or, instead of using a separator at 400888a09821a98ac0680fad765217302858e70fa4Paul Duffin * all, a splitter can extract adjacent substrings of a given {@linkplain 410888a09821a98ac0680fad765217302858e70fa4Paul Duffin * #fixedLength fixed length}. 420888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 430888a09821a98ac0680fad765217302858e70fa4Paul Duffin * <p>For example, this expression: <pre> {@code 440888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 450888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Splitter.on(',').split("foo,bar,qux")}</pre> 460888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 470888a09821a98ac0680fad765217302858e70fa4Paul Duffin * ... produces an {@code Iterable} containing {@code "foo"}, {@code "bar"} and 480888a09821a98ac0680fad765217302858e70fa4Paul Duffin * {@code "qux"}, in that order. 490888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 500888a09821a98ac0680fad765217302858e70fa4Paul Duffin * <p>By default, {@code Splitter}'s behavior is simplistic and unassuming. The 510888a09821a98ac0680fad765217302858e70fa4Paul Duffin * following expression: <pre> {@code 520888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 530888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Splitter.on(',').split(" foo,,, bar ,")}</pre> 540888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 550888a09821a98ac0680fad765217302858e70fa4Paul Duffin * ... yields the substrings {@code [" foo", "", "", " bar ", ""]}. If this 560888a09821a98ac0680fad765217302858e70fa4Paul Duffin * is not the desired behavior, use configuration methods to obtain a <i>new</i> 570888a09821a98ac0680fad765217302858e70fa4Paul Duffin * splitter instance with modified behavior: <pre> {@code 580888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 590888a09821a98ac0680fad765217302858e70fa4Paul Duffin * private static final Splitter MY_SPLITTER = Splitter.on(',') 600888a09821a98ac0680fad765217302858e70fa4Paul Duffin * .trimResults() 610888a09821a98ac0680fad765217302858e70fa4Paul Duffin * .omitEmptyStrings();}</pre> 620888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 630888a09821a98ac0680fad765217302858e70fa4Paul Duffin * <p>Now {@code MY_SPLITTER.split("foo,,, bar ,")} returns just {@code ["foo", 640888a09821a98ac0680fad765217302858e70fa4Paul Duffin * "bar"]}. Note that the order in which these configuration methods are called 650888a09821a98ac0680fad765217302858e70fa4Paul Duffin * is never significant. 660888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 670888a09821a98ac0680fad765217302858e70fa4Paul Duffin * <p><b>Warning:</b> Splitter instances are immutable. Invoking a configuration 680888a09821a98ac0680fad765217302858e70fa4Paul Duffin * method has no effect on the receiving instance; you must store and use the 690888a09821a98ac0680fad765217302858e70fa4Paul Duffin * new splitter instance it returns instead. <pre> {@code 700888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 710888a09821a98ac0680fad765217302858e70fa4Paul Duffin * // Do NOT do this 720888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Splitter splitter = Splitter.on('/'); 730888a09821a98ac0680fad765217302858e70fa4Paul Duffin * splitter.trimResults(); // does nothing! 740888a09821a98ac0680fad765217302858e70fa4Paul Duffin * return splitter.split("wrong / wrong / wrong");}</pre> 750888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 760888a09821a98ac0680fad765217302858e70fa4Paul Duffin * <p>For separator-based splitters that do not use {@code omitEmptyStrings}, an 770888a09821a98ac0680fad765217302858e70fa4Paul Duffin * input string containing {@code n} occurrences of the separator naturally 780888a09821a98ac0680fad765217302858e70fa4Paul Duffin * yields an iterable of size {@code n + 1}. So if the separator does not occur 790888a09821a98ac0680fad765217302858e70fa4Paul Duffin * anywhere in the input, a single substring is returned containing the entire 800888a09821a98ac0680fad765217302858e70fa4Paul Duffin * input. Consequently, all splitters split the empty string to {@code [""]} 810888a09821a98ac0680fad765217302858e70fa4Paul Duffin * (note: even fixed-length splitters). 820888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 830888a09821a98ac0680fad765217302858e70fa4Paul Duffin * <p>Splitter instances are thread-safe immutable, and are therefore safe to 840888a09821a98ac0680fad765217302858e70fa4Paul Duffin * store as {@code static final} constants. 850888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 860888a09821a98ac0680fad765217302858e70fa4Paul Duffin * <p>The {@link Joiner} class provides the inverse operation to splitting, but 870888a09821a98ac0680fad765217302858e70fa4Paul Duffin * note that a round-trip between the two should be assumed to be lossy. 880888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 890888a09821a98ac0680fad765217302858e70fa4Paul Duffin * <p>See the Guava User Guide article on <a href= 900888a09821a98ac0680fad765217302858e70fa4Paul Duffin * "http://code.google.com/p/guava-libraries/wiki/StringsExplained#Splitter"> 910888a09821a98ac0680fad765217302858e70fa4Paul Duffin * {@code Splitter}</a>. 920888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 930888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @author Julien Silland 940888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @author Jesse Wilson 950888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @author Kevin Bourrillion 960888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @author Louis Wasserman 970888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @since 1.0 980888a09821a98ac0680fad765217302858e70fa4Paul Duffin */ 990888a09821a98ac0680fad765217302858e70fa4Paul Duffin@GwtCompatible(emulated = true) 1000888a09821a98ac0680fad765217302858e70fa4Paul Duffinpublic final class Splitter { 1010888a09821a98ac0680fad765217302858e70fa4Paul Duffin private final CharMatcher trimmer; 1020888a09821a98ac0680fad765217302858e70fa4Paul Duffin private final boolean omitEmptyStrings; 1030888a09821a98ac0680fad765217302858e70fa4Paul Duffin private final Strategy strategy; 1040888a09821a98ac0680fad765217302858e70fa4Paul Duffin private final int limit; 1050888a09821a98ac0680fad765217302858e70fa4Paul Duffin 1060888a09821a98ac0680fad765217302858e70fa4Paul Duffin private Splitter(Strategy strategy) { 1070888a09821a98ac0680fad765217302858e70fa4Paul Duffin this(strategy, false, CharMatcher.NONE, Integer.MAX_VALUE); 1080888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 1090888a09821a98ac0680fad765217302858e70fa4Paul Duffin 1100888a09821a98ac0680fad765217302858e70fa4Paul Duffin private Splitter(Strategy strategy, boolean omitEmptyStrings, 1110888a09821a98ac0680fad765217302858e70fa4Paul Duffin CharMatcher trimmer, int limit) { 1120888a09821a98ac0680fad765217302858e70fa4Paul Duffin this.strategy = strategy; 1130888a09821a98ac0680fad765217302858e70fa4Paul Duffin this.omitEmptyStrings = omitEmptyStrings; 1140888a09821a98ac0680fad765217302858e70fa4Paul Duffin this.trimmer = trimmer; 1150888a09821a98ac0680fad765217302858e70fa4Paul Duffin this.limit = limit; 1160888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 1170888a09821a98ac0680fad765217302858e70fa4Paul Duffin 1180888a09821a98ac0680fad765217302858e70fa4Paul Duffin /** 1190888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Returns a splitter that uses the given single-character separator. For 1200888a09821a98ac0680fad765217302858e70fa4Paul Duffin * example, {@code Splitter.on(',').split("foo,,bar")} returns an iterable 1210888a09821a98ac0680fad765217302858e70fa4Paul Duffin * containing {@code ["foo", "", "bar"]}. 1220888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 1230888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @param separator the character to recognize as a separator 1240888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @return a splitter, with default settings, that recognizes that separator 1250888a09821a98ac0680fad765217302858e70fa4Paul Duffin */ 1260888a09821a98ac0680fad765217302858e70fa4Paul Duffin public static Splitter on(char separator) { 1270888a09821a98ac0680fad765217302858e70fa4Paul Duffin return on(CharMatcher.is(separator)); 1280888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 1290888a09821a98ac0680fad765217302858e70fa4Paul Duffin 1300888a09821a98ac0680fad765217302858e70fa4Paul Duffin /** 1310888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Returns a splitter that considers any single character matched by the 1320888a09821a98ac0680fad765217302858e70fa4Paul Duffin * given {@code CharMatcher} to be a separator. For example, {@code 1330888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Splitter.on(CharMatcher.anyOf(";,")).split("foo,;bar,quux")} returns an 1340888a09821a98ac0680fad765217302858e70fa4Paul Duffin * iterable containing {@code ["foo", "", "bar", "quux"]}. 1350888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 1360888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @param separatorMatcher a {@link CharMatcher} that determines whether a 1370888a09821a98ac0680fad765217302858e70fa4Paul Duffin * character is a separator 1380888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @return a splitter, with default settings, that uses this matcher 1390888a09821a98ac0680fad765217302858e70fa4Paul Duffin */ 1400888a09821a98ac0680fad765217302858e70fa4Paul Duffin public static Splitter on(final CharMatcher separatorMatcher) { 1410888a09821a98ac0680fad765217302858e70fa4Paul Duffin checkNotNull(separatorMatcher); 1420888a09821a98ac0680fad765217302858e70fa4Paul Duffin 1430888a09821a98ac0680fad765217302858e70fa4Paul Duffin return new Splitter(new Strategy() { 1440888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public SplittingIterator iterator( 1450888a09821a98ac0680fad765217302858e70fa4Paul Duffin Splitter splitter, final CharSequence toSplit) { 1460888a09821a98ac0680fad765217302858e70fa4Paul Duffin return new SplittingIterator(splitter, toSplit) { 1470888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override int separatorStart(int start) { 1480888a09821a98ac0680fad765217302858e70fa4Paul Duffin return separatorMatcher.indexIn(toSplit, start); 1490888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 1500888a09821a98ac0680fad765217302858e70fa4Paul Duffin 1510888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override int separatorEnd(int separatorPosition) { 1520888a09821a98ac0680fad765217302858e70fa4Paul Duffin return separatorPosition + 1; 1530888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 1540888a09821a98ac0680fad765217302858e70fa4Paul Duffin }; 1550888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 1560888a09821a98ac0680fad765217302858e70fa4Paul Duffin }); 1570888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 1580888a09821a98ac0680fad765217302858e70fa4Paul Duffin 1590888a09821a98ac0680fad765217302858e70fa4Paul Duffin /** 1600888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Returns a splitter that uses the given fixed string as a separator. For 1610888a09821a98ac0680fad765217302858e70fa4Paul Duffin * example, {@code Splitter.on(", ").split("foo, bar,baz")} returns an 1620888a09821a98ac0680fad765217302858e70fa4Paul Duffin * iterable containing {@code ["foo", "bar,baz"]}. 1630888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 1640888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @param separator the literal, nonempty string to recognize as a separator 1650888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @return a splitter, with default settings, that recognizes that separator 1660888a09821a98ac0680fad765217302858e70fa4Paul Duffin */ 1670888a09821a98ac0680fad765217302858e70fa4Paul Duffin public static Splitter on(final String separator) { 1680888a09821a98ac0680fad765217302858e70fa4Paul Duffin checkArgument(separator.length() != 0, 1690888a09821a98ac0680fad765217302858e70fa4Paul Duffin "The separator may not be the empty string."); 1700888a09821a98ac0680fad765217302858e70fa4Paul Duffin 1710888a09821a98ac0680fad765217302858e70fa4Paul Duffin return new Splitter(new Strategy() { 1720888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public SplittingIterator iterator( 1730888a09821a98ac0680fad765217302858e70fa4Paul Duffin Splitter splitter, CharSequence toSplit) { 1740888a09821a98ac0680fad765217302858e70fa4Paul Duffin return new SplittingIterator(splitter, toSplit) { 1750888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public int separatorStart(int start) { 1760888a09821a98ac0680fad765217302858e70fa4Paul Duffin int separatorLength = separator.length(); 1770888a09821a98ac0680fad765217302858e70fa4Paul Duffin 1780888a09821a98ac0680fad765217302858e70fa4Paul Duffin positions: 1790888a09821a98ac0680fad765217302858e70fa4Paul Duffin for (int p = start, last = toSplit.length() - separatorLength; 1800888a09821a98ac0680fad765217302858e70fa4Paul Duffin p <= last; p++) { 1810888a09821a98ac0680fad765217302858e70fa4Paul Duffin for (int i = 0; i < separatorLength; i++) { 1820888a09821a98ac0680fad765217302858e70fa4Paul Duffin if (toSplit.charAt(i + p) != separator.charAt(i)) { 1830888a09821a98ac0680fad765217302858e70fa4Paul Duffin continue positions; 1840888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 1850888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 1860888a09821a98ac0680fad765217302858e70fa4Paul Duffin return p; 1870888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 1880888a09821a98ac0680fad765217302858e70fa4Paul Duffin return -1; 1890888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 1900888a09821a98ac0680fad765217302858e70fa4Paul Duffin 1910888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public int separatorEnd(int separatorPosition) { 1920888a09821a98ac0680fad765217302858e70fa4Paul Duffin return separatorPosition + separator.length(); 1930888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 1940888a09821a98ac0680fad765217302858e70fa4Paul Duffin }; 1950888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 1960888a09821a98ac0680fad765217302858e70fa4Paul Duffin }); 1970888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 1980888a09821a98ac0680fad765217302858e70fa4Paul Duffin 1990888a09821a98ac0680fad765217302858e70fa4Paul Duffin /** 2000888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Returns a splitter that divides strings into pieces of the given length. 2010888a09821a98ac0680fad765217302858e70fa4Paul Duffin * For example, {@code Splitter.fixedLength(2).split("abcde")} returns an 2020888a09821a98ac0680fad765217302858e70fa4Paul Duffin * iterable containing {@code ["ab", "cd", "e"]}. The last piece can be 2030888a09821a98ac0680fad765217302858e70fa4Paul Duffin * smaller than {@code length} but will never be empty. 2040888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 2050888a09821a98ac0680fad765217302858e70fa4Paul Duffin * <p><b>Exception:</b> for consistency with separator-based splitters, {@code 2060888a09821a98ac0680fad765217302858e70fa4Paul Duffin * split("")} does not yield an empty iterable, but an iterable containing 2070888a09821a98ac0680fad765217302858e70fa4Paul Duffin * {@code ""}. This is the only case in which {@code 2080888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Iterables.size(split(input))} does not equal {@code 2090888a09821a98ac0680fad765217302858e70fa4Paul Duffin * IntMath.divide(input.length(), length, CEILING)}. To avoid this behavior, 2100888a09821a98ac0680fad765217302858e70fa4Paul Duffin * use {@code omitEmptyStrings}. 2110888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 2120888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @param length the desired length of pieces after splitting, a positive 2130888a09821a98ac0680fad765217302858e70fa4Paul Duffin * integer 2140888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @return a splitter, with default settings, that can split into fixed sized 2150888a09821a98ac0680fad765217302858e70fa4Paul Duffin * pieces 2160888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @throws IllegalArgumentException if {@code length} is zero or negative 2170888a09821a98ac0680fad765217302858e70fa4Paul Duffin */ 2180888a09821a98ac0680fad765217302858e70fa4Paul Duffin public static Splitter fixedLength(final int length) { 2190888a09821a98ac0680fad765217302858e70fa4Paul Duffin checkArgument(length > 0, "The length may not be less than 1"); 2200888a09821a98ac0680fad765217302858e70fa4Paul Duffin 2210888a09821a98ac0680fad765217302858e70fa4Paul Duffin return new Splitter(new Strategy() { 2220888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public SplittingIterator iterator( 2230888a09821a98ac0680fad765217302858e70fa4Paul Duffin final Splitter splitter, CharSequence toSplit) { 2240888a09821a98ac0680fad765217302858e70fa4Paul Duffin return new SplittingIterator(splitter, toSplit) { 2250888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public int separatorStart(int start) { 2260888a09821a98ac0680fad765217302858e70fa4Paul Duffin int nextChunkStart = start + length; 2270888a09821a98ac0680fad765217302858e70fa4Paul Duffin return (nextChunkStart < toSplit.length() ? nextChunkStart : -1); 2280888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 2290888a09821a98ac0680fad765217302858e70fa4Paul Duffin 2300888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public int separatorEnd(int separatorPosition) { 2310888a09821a98ac0680fad765217302858e70fa4Paul Duffin return separatorPosition; 2320888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 2330888a09821a98ac0680fad765217302858e70fa4Paul Duffin }; 2340888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 2350888a09821a98ac0680fad765217302858e70fa4Paul Duffin }); 2360888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 2370888a09821a98ac0680fad765217302858e70fa4Paul Duffin 2380888a09821a98ac0680fad765217302858e70fa4Paul Duffin /** 2390888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Returns a splitter that behaves equivalently to {@code this} splitter, but 2400888a09821a98ac0680fad765217302858e70fa4Paul Duffin * automatically omits empty strings from the results. For example, {@code 2410888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Splitter.on(',').omitEmptyStrings().split(",a,,,b,c,,")} returns an 2420888a09821a98ac0680fad765217302858e70fa4Paul Duffin * iterable containing only {@code ["a", "b", "c"]}. 2430888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 2440888a09821a98ac0680fad765217302858e70fa4Paul Duffin * <p>If either {@code trimResults} option is also specified when creating a 2450888a09821a98ac0680fad765217302858e70fa4Paul Duffin * splitter, that splitter always trims results first before checking for 2460888a09821a98ac0680fad765217302858e70fa4Paul Duffin * emptiness. So, for example, {@code 2470888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Splitter.on(':').omitEmptyStrings().trimResults().split(": : : ")} returns 2480888a09821a98ac0680fad765217302858e70fa4Paul Duffin * an empty iterable. 2490888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 2500888a09821a98ac0680fad765217302858e70fa4Paul Duffin * <p>Note that it is ordinarily not possible for {@link #split(CharSequence)} 2510888a09821a98ac0680fad765217302858e70fa4Paul Duffin * to return an empty iterable, but when using this option, it can (if the 2520888a09821a98ac0680fad765217302858e70fa4Paul Duffin * input sequence consists of nothing but separators). 2530888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 2540888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @return a splitter with the desired configuration 2550888a09821a98ac0680fad765217302858e70fa4Paul Duffin */ 2560888a09821a98ac0680fad765217302858e70fa4Paul Duffin @CheckReturnValue 2570888a09821a98ac0680fad765217302858e70fa4Paul Duffin public Splitter omitEmptyStrings() { 2580888a09821a98ac0680fad765217302858e70fa4Paul Duffin return new Splitter(strategy, true, trimmer, limit); 2590888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 2600888a09821a98ac0680fad765217302858e70fa4Paul Duffin 2610888a09821a98ac0680fad765217302858e70fa4Paul Duffin /** 2620888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Returns a splitter that behaves equivalently to {@code this} splitter but 2630888a09821a98ac0680fad765217302858e70fa4Paul Duffin * stops splitting after it reaches the limit. 2640888a09821a98ac0680fad765217302858e70fa4Paul Duffin * The limit defines the maximum number of items returned by the iterator. 2650888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 2660888a09821a98ac0680fad765217302858e70fa4Paul Duffin * <p>For example, 2670888a09821a98ac0680fad765217302858e70fa4Paul Duffin * {@code Splitter.on(',').limit(3).split("a,b,c,d")} returns an iterable 2680888a09821a98ac0680fad765217302858e70fa4Paul Duffin * containing {@code ["a", "b", "c,d"]}. When omitting empty strings, the 2690888a09821a98ac0680fad765217302858e70fa4Paul Duffin * omitted strings do no count. Hence, 2700888a09821a98ac0680fad765217302858e70fa4Paul Duffin * {@code Splitter.on(',').limit(3).omitEmptyStrings().split("a,,,b,,,c,d")} 2710888a09821a98ac0680fad765217302858e70fa4Paul Duffin * returns an iterable containing {@code ["a", "b", "c,d"}. 2720888a09821a98ac0680fad765217302858e70fa4Paul Duffin * When trim is requested, all entries, including the last are trimmed. Hence 2730888a09821a98ac0680fad765217302858e70fa4Paul Duffin * {@code Splitter.on(',').limit(3).trimResults().split(" a , b , c , d ")} 2740888a09821a98ac0680fad765217302858e70fa4Paul Duffin * results in @{code ["a", "b", "c , d"]}. 2750888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 2760888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @param limit the maximum number of items returns 2770888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @return a splitter with the desired configuration 2780888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @since 9.0 2790888a09821a98ac0680fad765217302858e70fa4Paul Duffin */ 2800888a09821a98ac0680fad765217302858e70fa4Paul Duffin @CheckReturnValue 2810888a09821a98ac0680fad765217302858e70fa4Paul Duffin public Splitter limit(int limit) { 2820888a09821a98ac0680fad765217302858e70fa4Paul Duffin checkArgument(limit > 0, "must be greater than zero: %s", limit); 2830888a09821a98ac0680fad765217302858e70fa4Paul Duffin return new Splitter(strategy, omitEmptyStrings, trimmer, limit); 2840888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 2850888a09821a98ac0680fad765217302858e70fa4Paul Duffin 2860888a09821a98ac0680fad765217302858e70fa4Paul Duffin /** 2870888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Returns a splitter that behaves equivalently to {@code this} splitter, but 2880888a09821a98ac0680fad765217302858e70fa4Paul Duffin * automatically removes leading and trailing {@linkplain 2890888a09821a98ac0680fad765217302858e70fa4Paul Duffin * CharMatcher#WHITESPACE whitespace} from each returned substring; equivalent 2900888a09821a98ac0680fad765217302858e70fa4Paul Duffin * to {@code trimResults(CharMatcher.WHITESPACE)}. For example, {@code 2910888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Splitter.on(',').trimResults().split(" a, b ,c ")} returns an iterable 2920888a09821a98ac0680fad765217302858e70fa4Paul Duffin * containing {@code ["a", "b", "c"]}. 2930888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 2940888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @return a splitter with the desired configuration 2950888a09821a98ac0680fad765217302858e70fa4Paul Duffin */ 2960888a09821a98ac0680fad765217302858e70fa4Paul Duffin @CheckReturnValue 2970888a09821a98ac0680fad765217302858e70fa4Paul Duffin public Splitter trimResults() { 2980888a09821a98ac0680fad765217302858e70fa4Paul Duffin return trimResults(CharMatcher.WHITESPACE); 2990888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3000888a09821a98ac0680fad765217302858e70fa4Paul Duffin 3010888a09821a98ac0680fad765217302858e70fa4Paul Duffin /** 3020888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Returns a splitter that behaves equivalently to {@code this} splitter, but 3030888a09821a98ac0680fad765217302858e70fa4Paul Duffin * removes all leading or trailing characters matching the given {@code 3040888a09821a98ac0680fad765217302858e70fa4Paul Duffin * CharMatcher} from each returned substring. For example, {@code 3050888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Splitter.on(',').trimResults(CharMatcher.is('_')).split("_a ,_b_ ,c__")} 3060888a09821a98ac0680fad765217302858e70fa4Paul Duffin * returns an iterable containing {@code ["a ", "b_ ", "c"]}. 3070888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 3080888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @param trimmer a {@link CharMatcher} that determines whether a character 3090888a09821a98ac0680fad765217302858e70fa4Paul Duffin * should be removed from the beginning/end of a subsequence 3100888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @return a splitter with the desired configuration 3110888a09821a98ac0680fad765217302858e70fa4Paul Duffin */ 3120888a09821a98ac0680fad765217302858e70fa4Paul Duffin // TODO(kevinb): throw if a trimmer was already specified! 3130888a09821a98ac0680fad765217302858e70fa4Paul Duffin @CheckReturnValue 3140888a09821a98ac0680fad765217302858e70fa4Paul Duffin public Splitter trimResults(CharMatcher trimmer) { 3150888a09821a98ac0680fad765217302858e70fa4Paul Duffin checkNotNull(trimmer); 3160888a09821a98ac0680fad765217302858e70fa4Paul Duffin return new Splitter(strategy, omitEmptyStrings, trimmer, limit); 3170888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3180888a09821a98ac0680fad765217302858e70fa4Paul Duffin 3190888a09821a98ac0680fad765217302858e70fa4Paul Duffin /** 3200888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Splits {@code sequence} into string components and makes them available 3210888a09821a98ac0680fad765217302858e70fa4Paul Duffin * through an {@link Iterator}, which may be lazily evaluated. If you want 3220888a09821a98ac0680fad765217302858e70fa4Paul Duffin * an eagerly computed {@link List}, use {@link #splitToList(CharSequence)}. 3230888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 3240888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @param sequence the sequence of characters to split 3250888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @return an iteration over the segments split from the parameter. 3260888a09821a98ac0680fad765217302858e70fa4Paul Duffin */ 3270888a09821a98ac0680fad765217302858e70fa4Paul Duffin public Iterable<String> split(final CharSequence sequence) { 3280888a09821a98ac0680fad765217302858e70fa4Paul Duffin checkNotNull(sequence); 3290888a09821a98ac0680fad765217302858e70fa4Paul Duffin 3300888a09821a98ac0680fad765217302858e70fa4Paul Duffin return new Iterable<String>() { 3310888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public Iterator<String> iterator() { 3320888a09821a98ac0680fad765217302858e70fa4Paul Duffin return splittingIterator(sequence); 3330888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3340888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public String toString() { 3350888a09821a98ac0680fad765217302858e70fa4Paul Duffin return Joiner.on(", ") 3360888a09821a98ac0680fad765217302858e70fa4Paul Duffin .appendTo(new StringBuilder().append('['), this) 3370888a09821a98ac0680fad765217302858e70fa4Paul Duffin .append(']') 3380888a09821a98ac0680fad765217302858e70fa4Paul Duffin .toString(); 3390888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3400888a09821a98ac0680fad765217302858e70fa4Paul Duffin }; 3410888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3420888a09821a98ac0680fad765217302858e70fa4Paul Duffin 3430888a09821a98ac0680fad765217302858e70fa4Paul Duffin private Iterator<String> splittingIterator(CharSequence sequence) { 3440888a09821a98ac0680fad765217302858e70fa4Paul Duffin return strategy.iterator(this, sequence); 3450888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3460888a09821a98ac0680fad765217302858e70fa4Paul Duffin 3470888a09821a98ac0680fad765217302858e70fa4Paul Duffin /** 3480888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Splits {@code sequence} into string components and returns them as 3490888a09821a98ac0680fad765217302858e70fa4Paul Duffin * an immutable list. If you want an {@link Iterable} which may be lazily 3500888a09821a98ac0680fad765217302858e70fa4Paul Duffin * evaluated, use {@link #split(CharSequence)}. 3510888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 3520888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @param sequence the sequence of characters to split 3530888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @return an immutable list of the segments split from the parameter 3540888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @since 15.0 3550888a09821a98ac0680fad765217302858e70fa4Paul Duffin */ 3560888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Beta 3570888a09821a98ac0680fad765217302858e70fa4Paul Duffin public List<String> splitToList(CharSequence sequence) { 3580888a09821a98ac0680fad765217302858e70fa4Paul Duffin checkNotNull(sequence); 3590888a09821a98ac0680fad765217302858e70fa4Paul Duffin 3600888a09821a98ac0680fad765217302858e70fa4Paul Duffin Iterator<String> iterator = splittingIterator(sequence); 3610888a09821a98ac0680fad765217302858e70fa4Paul Duffin List<String> result = new ArrayList<String>(); 3620888a09821a98ac0680fad765217302858e70fa4Paul Duffin 3630888a09821a98ac0680fad765217302858e70fa4Paul Duffin while (iterator.hasNext()) { 3640888a09821a98ac0680fad765217302858e70fa4Paul Duffin result.add(iterator.next()); 3650888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3660888a09821a98ac0680fad765217302858e70fa4Paul Duffin 3670888a09821a98ac0680fad765217302858e70fa4Paul Duffin return Collections.unmodifiableList(result); 3680888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3690888a09821a98ac0680fad765217302858e70fa4Paul Duffin 3700888a09821a98ac0680fad765217302858e70fa4Paul Duffin /** 3710888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Returns a {@code MapSplitter} which splits entries based on this splitter, 3720888a09821a98ac0680fad765217302858e70fa4Paul Duffin * and splits entries into keys and values using the specified separator. 3730888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 3740888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @since 10.0 3750888a09821a98ac0680fad765217302858e70fa4Paul Duffin */ 3760888a09821a98ac0680fad765217302858e70fa4Paul Duffin @CheckReturnValue 3770888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Beta 3780888a09821a98ac0680fad765217302858e70fa4Paul Duffin public MapSplitter withKeyValueSeparator(String separator) { 3790888a09821a98ac0680fad765217302858e70fa4Paul Duffin return withKeyValueSeparator(on(separator)); 3800888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3810888a09821a98ac0680fad765217302858e70fa4Paul Duffin 3820888a09821a98ac0680fad765217302858e70fa4Paul Duffin /** 3830888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Returns a {@code MapSplitter} which splits entries based on this splitter, 3840888a09821a98ac0680fad765217302858e70fa4Paul Duffin * and splits entries into keys and values using the specified separator. 3850888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 3860888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @since 14.0 3870888a09821a98ac0680fad765217302858e70fa4Paul Duffin */ 3880888a09821a98ac0680fad765217302858e70fa4Paul Duffin @CheckReturnValue 3890888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Beta 3900888a09821a98ac0680fad765217302858e70fa4Paul Duffin public MapSplitter withKeyValueSeparator(char separator) { 3910888a09821a98ac0680fad765217302858e70fa4Paul Duffin return withKeyValueSeparator(on(separator)); 3920888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3930888a09821a98ac0680fad765217302858e70fa4Paul Duffin 3940888a09821a98ac0680fad765217302858e70fa4Paul Duffin /** 3950888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Returns a {@code MapSplitter} which splits entries based on this splitter, 3960888a09821a98ac0680fad765217302858e70fa4Paul Duffin * and splits entries into keys and values using the specified key-value 3970888a09821a98ac0680fad765217302858e70fa4Paul Duffin * splitter. 3980888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 3990888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @since 10.0 4000888a09821a98ac0680fad765217302858e70fa4Paul Duffin */ 4010888a09821a98ac0680fad765217302858e70fa4Paul Duffin @CheckReturnValue 4020888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Beta 4030888a09821a98ac0680fad765217302858e70fa4Paul Duffin public MapSplitter withKeyValueSeparator(Splitter keyValueSplitter) { 4040888a09821a98ac0680fad765217302858e70fa4Paul Duffin return new MapSplitter(this, keyValueSplitter); 4050888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 4060888a09821a98ac0680fad765217302858e70fa4Paul Duffin 4070888a09821a98ac0680fad765217302858e70fa4Paul Duffin /** 4080888a09821a98ac0680fad765217302858e70fa4Paul Duffin * An object that splits strings into maps as {@code Splitter} splits 4090888a09821a98ac0680fad765217302858e70fa4Paul Duffin * iterables and lists. Like {@code Splitter}, it is thread-safe and 4100888a09821a98ac0680fad765217302858e70fa4Paul Duffin * immutable. 4110888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 4120888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @since 10.0 4130888a09821a98ac0680fad765217302858e70fa4Paul Duffin */ 4140888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Beta 4150888a09821a98ac0680fad765217302858e70fa4Paul Duffin public static final class MapSplitter { 4160888a09821a98ac0680fad765217302858e70fa4Paul Duffin private static final String INVALID_ENTRY_MESSAGE = 4170888a09821a98ac0680fad765217302858e70fa4Paul Duffin "Chunk [%s] is not a valid entry"; 4180888a09821a98ac0680fad765217302858e70fa4Paul Duffin private final Splitter outerSplitter; 4190888a09821a98ac0680fad765217302858e70fa4Paul Duffin private final Splitter entrySplitter; 4200888a09821a98ac0680fad765217302858e70fa4Paul Duffin 4210888a09821a98ac0680fad765217302858e70fa4Paul Duffin private MapSplitter(Splitter outerSplitter, Splitter entrySplitter) { 4220888a09821a98ac0680fad765217302858e70fa4Paul Duffin this.outerSplitter = outerSplitter; // only "this" is passed 4230888a09821a98ac0680fad765217302858e70fa4Paul Duffin this.entrySplitter = checkNotNull(entrySplitter); 4240888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 4250888a09821a98ac0680fad765217302858e70fa4Paul Duffin 4260888a09821a98ac0680fad765217302858e70fa4Paul Duffin /** 4270888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Splits {@code sequence} into substrings, splits each substring into 4280888a09821a98ac0680fad765217302858e70fa4Paul Duffin * an entry, and returns an unmodifiable map with each of the entries. For 4290888a09821a98ac0680fad765217302858e70fa4Paul Duffin * example, <code> 4300888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Splitter.on(';').trimResults().withKeyValueSeparator("=>") 4310888a09821a98ac0680fad765217302858e70fa4Paul Duffin * .split("a=>b ; c=>b") 4320888a09821a98ac0680fad765217302858e70fa4Paul Duffin * </code> will return a mapping from {@code "a"} to {@code "b"} and 4330888a09821a98ac0680fad765217302858e70fa4Paul Duffin * {@code "c"} to {@code b}. 4340888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 4350888a09821a98ac0680fad765217302858e70fa4Paul Duffin * <p>The returned map preserves the order of the entries from 4360888a09821a98ac0680fad765217302858e70fa4Paul Duffin * {@code sequence}. 4370888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 4380888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @throws IllegalArgumentException if the specified sequence does not split 4390888a09821a98ac0680fad765217302858e70fa4Paul Duffin * into valid map entries, or if there are duplicate keys 4400888a09821a98ac0680fad765217302858e70fa4Paul Duffin */ 4410888a09821a98ac0680fad765217302858e70fa4Paul Duffin public Map<String, String> split(CharSequence sequence) { 4420888a09821a98ac0680fad765217302858e70fa4Paul Duffin Map<String, String> map = new LinkedHashMap<String, String>(); 4430888a09821a98ac0680fad765217302858e70fa4Paul Duffin for (String entry : outerSplitter.split(sequence)) { 4440888a09821a98ac0680fad765217302858e70fa4Paul Duffin Iterator<String> entryFields = entrySplitter.splittingIterator(entry); 4450888a09821a98ac0680fad765217302858e70fa4Paul Duffin 4460888a09821a98ac0680fad765217302858e70fa4Paul Duffin checkArgument(entryFields.hasNext(), INVALID_ENTRY_MESSAGE, entry); 4470888a09821a98ac0680fad765217302858e70fa4Paul Duffin String key = entryFields.next(); 4480888a09821a98ac0680fad765217302858e70fa4Paul Duffin checkArgument(!map.containsKey(key), "Duplicate key [%s] found.", key); 4490888a09821a98ac0680fad765217302858e70fa4Paul Duffin 4500888a09821a98ac0680fad765217302858e70fa4Paul Duffin checkArgument(entryFields.hasNext(), INVALID_ENTRY_MESSAGE, entry); 4510888a09821a98ac0680fad765217302858e70fa4Paul Duffin String value = entryFields.next(); 4520888a09821a98ac0680fad765217302858e70fa4Paul Duffin map.put(key, value); 4530888a09821a98ac0680fad765217302858e70fa4Paul Duffin 4540888a09821a98ac0680fad765217302858e70fa4Paul Duffin checkArgument(!entryFields.hasNext(), INVALID_ENTRY_MESSAGE, entry); 4550888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 4560888a09821a98ac0680fad765217302858e70fa4Paul Duffin return Collections.unmodifiableMap(map); 4570888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 4580888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 4590888a09821a98ac0680fad765217302858e70fa4Paul Duffin 4600888a09821a98ac0680fad765217302858e70fa4Paul Duffin private interface Strategy { 4610888a09821a98ac0680fad765217302858e70fa4Paul Duffin Iterator<String> iterator(Splitter splitter, CharSequence toSplit); 4620888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 4630888a09821a98ac0680fad765217302858e70fa4Paul Duffin 4640888a09821a98ac0680fad765217302858e70fa4Paul Duffin private abstract static class SplittingIterator extends AbstractIterator<String> { 4650888a09821a98ac0680fad765217302858e70fa4Paul Duffin final CharSequence toSplit; 4660888a09821a98ac0680fad765217302858e70fa4Paul Duffin final CharMatcher trimmer; 4670888a09821a98ac0680fad765217302858e70fa4Paul Duffin final boolean omitEmptyStrings; 4680888a09821a98ac0680fad765217302858e70fa4Paul Duffin 4690888a09821a98ac0680fad765217302858e70fa4Paul Duffin /** 4700888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Returns the first index in {@code toSplit} at or after {@code start} 4710888a09821a98ac0680fad765217302858e70fa4Paul Duffin * that contains the separator. 4720888a09821a98ac0680fad765217302858e70fa4Paul Duffin */ 4730888a09821a98ac0680fad765217302858e70fa4Paul Duffin abstract int separatorStart(int start); 4740888a09821a98ac0680fad765217302858e70fa4Paul Duffin 4750888a09821a98ac0680fad765217302858e70fa4Paul Duffin /** 4760888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Returns the first index in {@code toSplit} after {@code 4770888a09821a98ac0680fad765217302858e70fa4Paul Duffin * separatorPosition} that does not contain a separator. This method is only 4780888a09821a98ac0680fad765217302858e70fa4Paul Duffin * invoked after a call to {@code separatorStart}. 4790888a09821a98ac0680fad765217302858e70fa4Paul Duffin */ 4800888a09821a98ac0680fad765217302858e70fa4Paul Duffin abstract int separatorEnd(int separatorPosition); 4810888a09821a98ac0680fad765217302858e70fa4Paul Duffin 4820888a09821a98ac0680fad765217302858e70fa4Paul Duffin int offset = 0; 4830888a09821a98ac0680fad765217302858e70fa4Paul Duffin int limit; 4840888a09821a98ac0680fad765217302858e70fa4Paul Duffin 4850888a09821a98ac0680fad765217302858e70fa4Paul Duffin protected SplittingIterator(Splitter splitter, CharSequence toSplit) { 4860888a09821a98ac0680fad765217302858e70fa4Paul Duffin this.trimmer = splitter.trimmer; 4870888a09821a98ac0680fad765217302858e70fa4Paul Duffin this.omitEmptyStrings = splitter.omitEmptyStrings; 4880888a09821a98ac0680fad765217302858e70fa4Paul Duffin this.limit = splitter.limit; 4890888a09821a98ac0680fad765217302858e70fa4Paul Duffin this.toSplit = toSplit; 4900888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 4910888a09821a98ac0680fad765217302858e70fa4Paul Duffin 4920888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override protected String computeNext() { 4930888a09821a98ac0680fad765217302858e70fa4Paul Duffin /* 4940888a09821a98ac0680fad765217302858e70fa4Paul Duffin * The returned string will be from the end of the last match to the 4950888a09821a98ac0680fad765217302858e70fa4Paul Duffin * beginning of the next one. nextStart is the start position of the 4960888a09821a98ac0680fad765217302858e70fa4Paul Duffin * returned substring, while offset is the place to start looking for a 4970888a09821a98ac0680fad765217302858e70fa4Paul Duffin * separator. 4980888a09821a98ac0680fad765217302858e70fa4Paul Duffin */ 4990888a09821a98ac0680fad765217302858e70fa4Paul Duffin int nextStart = offset; 5000888a09821a98ac0680fad765217302858e70fa4Paul Duffin while (offset != -1) { 5010888a09821a98ac0680fad765217302858e70fa4Paul Duffin int start = nextStart; 5020888a09821a98ac0680fad765217302858e70fa4Paul Duffin int end; 5030888a09821a98ac0680fad765217302858e70fa4Paul Duffin 5040888a09821a98ac0680fad765217302858e70fa4Paul Duffin int separatorPosition = separatorStart(offset); 5050888a09821a98ac0680fad765217302858e70fa4Paul Duffin if (separatorPosition == -1) { 5060888a09821a98ac0680fad765217302858e70fa4Paul Duffin end = toSplit.length(); 5070888a09821a98ac0680fad765217302858e70fa4Paul Duffin offset = -1; 5080888a09821a98ac0680fad765217302858e70fa4Paul Duffin } else { 5090888a09821a98ac0680fad765217302858e70fa4Paul Duffin end = separatorPosition; 5100888a09821a98ac0680fad765217302858e70fa4Paul Duffin offset = separatorEnd(separatorPosition); 5110888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 5120888a09821a98ac0680fad765217302858e70fa4Paul Duffin if (offset == nextStart) { 5130888a09821a98ac0680fad765217302858e70fa4Paul Duffin /* 5140888a09821a98ac0680fad765217302858e70fa4Paul Duffin * This occurs when some pattern has an empty match, even if it 5150888a09821a98ac0680fad765217302858e70fa4Paul Duffin * doesn't match the empty string -- for example, if it requires 5160888a09821a98ac0680fad765217302858e70fa4Paul Duffin * lookahead or the like. The offset must be increased to look for 5170888a09821a98ac0680fad765217302858e70fa4Paul Duffin * separators beyond this point, without changing the start position 5180888a09821a98ac0680fad765217302858e70fa4Paul Duffin * of the next returned substring -- so nextStart stays the same. 5190888a09821a98ac0680fad765217302858e70fa4Paul Duffin */ 5200888a09821a98ac0680fad765217302858e70fa4Paul Duffin offset++; 5210888a09821a98ac0680fad765217302858e70fa4Paul Duffin if (offset >= toSplit.length()) { 5220888a09821a98ac0680fad765217302858e70fa4Paul Duffin offset = -1; 5230888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 5240888a09821a98ac0680fad765217302858e70fa4Paul Duffin continue; 5250888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 5260888a09821a98ac0680fad765217302858e70fa4Paul Duffin 5270888a09821a98ac0680fad765217302858e70fa4Paul Duffin while (start < end && trimmer.matches(toSplit.charAt(start))) { 5280888a09821a98ac0680fad765217302858e70fa4Paul Duffin start++; 5290888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 5300888a09821a98ac0680fad765217302858e70fa4Paul Duffin while (end > start && trimmer.matches(toSplit.charAt(end - 1))) { 5310888a09821a98ac0680fad765217302858e70fa4Paul Duffin end--; 5320888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 5330888a09821a98ac0680fad765217302858e70fa4Paul Duffin 5340888a09821a98ac0680fad765217302858e70fa4Paul Duffin if (omitEmptyStrings && start == end) { 5350888a09821a98ac0680fad765217302858e70fa4Paul Duffin // Don't include the (unused) separator in next split string. 5360888a09821a98ac0680fad765217302858e70fa4Paul Duffin nextStart = offset; 5370888a09821a98ac0680fad765217302858e70fa4Paul Duffin continue; 5380888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 5390888a09821a98ac0680fad765217302858e70fa4Paul Duffin 5400888a09821a98ac0680fad765217302858e70fa4Paul Duffin if (limit == 1) { 5410888a09821a98ac0680fad765217302858e70fa4Paul Duffin // The limit has been reached, return the rest of the string as the 5420888a09821a98ac0680fad765217302858e70fa4Paul Duffin // final item. This is tested after empty string removal so that 5430888a09821a98ac0680fad765217302858e70fa4Paul Duffin // empty strings do not count towards the limit. 5440888a09821a98ac0680fad765217302858e70fa4Paul Duffin end = toSplit.length(); 5450888a09821a98ac0680fad765217302858e70fa4Paul Duffin offset = -1; 5460888a09821a98ac0680fad765217302858e70fa4Paul Duffin // Since we may have changed the end, we need to trim it again. 5470888a09821a98ac0680fad765217302858e70fa4Paul Duffin while (end > start && trimmer.matches(toSplit.charAt(end - 1))) { 5480888a09821a98ac0680fad765217302858e70fa4Paul Duffin end--; 5490888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 5500888a09821a98ac0680fad765217302858e70fa4Paul Duffin } else { 5510888a09821a98ac0680fad765217302858e70fa4Paul Duffin limit--; 5520888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 5530888a09821a98ac0680fad765217302858e70fa4Paul Duffin 5540888a09821a98ac0680fad765217302858e70fa4Paul Duffin return toSplit.subSequence(start, end).toString(); 5550888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 5560888a09821a98ac0680fad765217302858e70fa4Paul Duffin return endOfData(); 5570888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 5580888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 5590888a09821a98ac0680fad765217302858e70fa4Paul Duffin} 5600888a09821a98ac0680fad765217302858e70fa4Paul Duffin 561