1/* 2 * Copyright (C) 2009 Google Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.android.mail.lib.base; 18 19import static com.android.mail.lib.base.Preconditions.checkArgument; 20import static com.android.mail.lib.base.Preconditions.checkNotNull; 21import static com.android.mail.lib.base.Preconditions.checkState; 22 23import com.google.common.base.Joiner; 24 25import java.util.Iterator; 26import java.util.NoSuchElementException; 27import java.util.StringTokenizer; 28import java.util.regex.Matcher; 29import java.util.regex.Pattern; 30import java.util.regex.PatternSyntaxException; 31 32/** 33 * An object that divides strings (or other instances of {@code CharSequence}) 34 * into substrings, by recognizing a <i>separator</i> (a.k.a. "delimiter") 35 * which can be expressed as a single character, literal string, regular 36 * expression, {@code CharMatcher}, or by using a fixed substring length. This 37 * class provides the complementary functionality to {@link Joiner}. 38 * 39 * <p>Here is the most basic example of {@code Splitter} usage: <pre> {@code 40 * 41 * Splitter.on(',').split("foo,bar")}</pre> 42 * 43 * This invocation returns an {@code Iterable<String>} containing {@code "foo"} 44 * and {@code "bar"}, in that order. 45 * 46 * <p>By default {@code Splitter}'s behavior is very simplistic: <pre> {@code 47 * 48 * Splitter.on(',').split("foo,,bar, quux")}</pre> 49 * 50 * This returns an iterable containing {@code ["foo", "", "bar", " quux"]}. 51 * Notice that the splitter does not assume that you want empty strings removed, 52 * or that you wish to trim whitespace. If you want features like these, simply 53 * ask for them: <pre> {@code 54 * 55 * private static final Splitter MY_SPLITTER = Splitter.on(',') 56 * .trimResults() 57 * .omitEmptyStrings();}</pre> 58 * 59 * Now {@code MY_SPLITTER.split("foo, ,bar, quux,")} returns an iterable 60 * containing just {@code ["foo", "bar", "quux"]}. Note that the order in which 61 * the configuration methods are called is never significant; for instance, 62 * trimming is always applied first before checking for an empty result, 63 * regardless of the order in which the {@link #trimResults()} and 64 * {@link #omitEmptyStrings()} methods were invoked. 65 * 66 * <p><b>Warning: splitter instances are always immutable</b>; a configuration 67 * method such as {@code omitEmptyStrings} has no effect on the instance it 68 * is invoked on! You must store and use the new splitter instance returned by 69 * the method. This makes splitters thread-safe, and safe to store as {@code 70 * static final} constants (as illustrated above). <pre> {@code 71 * 72 * // Bad! Do not do this! 73 * Splitter splitter = Splitter.on('/'); 74 * splitter.trimResults(); // does nothing! 75 * return splitter.split("wrong / wrong / wrong");}</pre> 76 * 77 * The separator recognized by the splitter does not have to be a single 78 * literal character as in the examples above. See the methods {@link 79 * #on(String)}, {@link #on(Pattern)} and {@link #on(CharMatcher)} for examples 80 * of other ways to specify separators. 81 * 82 * <p><b>Note:</b> this class does not mimic any of the quirky behaviors of 83 * similar JDK methods; for instance, it does not silently discard trailing 84 * separators, as does {@link String#split(String)}, nor does it have a default 85 * behavior of using five particular whitespace characters as separators, like 86 * {@link StringTokenizer}. 87 * 88 * @author Julien Silland 89 * @author Jesse Wilson 90 * @author Kevin Bourrillion 91 * @since 2009.09.15 <b>tentative</b> 92 */ 93public final class Splitter { 94 private final CharMatcher trimmer; 95 private final boolean omitEmptyStrings; 96 private final Strategy strategy; 97 98 private Splitter(Strategy strategy) { 99 this(strategy, false, CharMatcher.NONE); 100 } 101 102 private Splitter(Strategy strategy, boolean omitEmptyStrings, 103 CharMatcher trimmer) { 104 this.strategy = strategy; 105 this.omitEmptyStrings = omitEmptyStrings; 106 this.trimmer = trimmer; 107 } 108 109 /** 110 * Returns a splitter that uses the given single-character separator. For 111 * example, {@code Splitter.on(',').split("foo,,bar")} returns an iterable 112 * containing {@code ["foo", "", "bar"]}. 113 * 114 * @param separator the character to recognize as a separator 115 * @return a splitter, with default settings, that recognizes that separator 116 */ 117 public static Splitter on(char separator) { 118 return on(CharMatcher.is(separator)); 119 } 120 121 /** 122 * Returns a splitter that considers any single character matched by the 123 * given {@code CharMatcher} to be a separator. For example, {@code 124 * Splitter.on(CharMatcher.anyOf(";,")).split("foo,;bar,quux")} returns an 125 * iterable containing {@code ["foo", "", "bar", "quux"]}. 126 * 127 * @param separatorMatcher a {@link CharMatcher} that determines whether a 128 * character is a separator 129 * @return a splitter, with default settings, that uses this matcher 130 */ 131 public static Splitter on(final CharMatcher separatorMatcher) { 132 checkNotNull(separatorMatcher); 133 134 return new Splitter(new Strategy() { 135 /*@Override*/ public SplittingIterator iterator( 136 Splitter splitter, final CharSequence toSplit) { 137 return new SplittingIterator(splitter, toSplit) { 138 @Override int separatorStart(int start) { 139 return separatorMatcher.indexIn(toSplit, start); 140 } 141 142 @Override int separatorEnd(int separatorPosition) { 143 return separatorPosition + 1; 144 } 145 }; 146 } 147 }); 148 } 149 150 /** 151 * Returns a splitter that uses the given fixed string as a separator. For 152 * example, {@code Splitter.on(", ").split("foo, bar, baz,qux")} returns an 153 * iterable containing {@code ["foo", "bar", "baz,qux"]}. 154 * 155 * @param separator the literal, nonempty string to recognize as a separator 156 * @return a splitter, with default settings, that recognizes that separator 157 */ 158 public static Splitter on(final String separator) { 159 checkArgument(separator.length() != 0, 160 "The separator may not be the empty string."); 161 162 return new Splitter(new Strategy() { 163 /*@Override*/ public SplittingIterator iterator( 164 Splitter splitter, CharSequence toSplit) { 165 return new SplittingIterator(splitter, toSplit) { 166 @Override public int separatorStart(int start) { 167 int delimeterLength = separator.length(); 168 169 positions: 170 for (int p = start, last = toSplit.length() - delimeterLength; 171 p <= last; p++) { 172 for (int i = 0; i < delimeterLength; i++) { 173 if (toSplit.charAt(i + p) != separator.charAt(i)) { 174 continue positions; 175 } 176 } 177 return p; 178 } 179 return -1; 180 } 181 182 @Override public int separatorEnd(int separatorPosition) { 183 return separatorPosition + separator.length(); 184 } 185 }; 186 } 187 }); 188 } 189 190 /** 191 * Returns a splitter that considers any subsequence matching {@code 192 * pattern} to be a separator. For example, {@code 193 * Splitter.on(Pattern.compile("\r?\n")).split(entireFile)} splits a string 194 * into lines whether it uses DOS-style or UNIX-style line terminators. 195 * 196 * @param separatorPattern the pattern that determines whether a subsequence 197 * is a separator. This pattern may not match the empty string. 198 * @return a splitter, with default settings, that uses this pattern 199 * @throws IllegalArgumentException if {@code separatorPattern} matches the 200 * empty string 201 */ 202 public static Splitter on(final Pattern separatorPattern) { 203 checkNotNull(separatorPattern); 204 checkArgument(!separatorPattern.matcher("").matches(), 205 "The pattern may not match the empty string: %s", separatorPattern); 206 207 return new Splitter(new Strategy() { 208 /*@Override*/ public SplittingIterator iterator( 209 final Splitter splitter, CharSequence toSplit) { 210 final Matcher matcher = separatorPattern.matcher(toSplit); 211 return new SplittingIterator(splitter, toSplit) { 212 @Override public int separatorStart(int start) { 213 return matcher.find(start) ? matcher.start() : -1; 214 } 215 216 @Override public int separatorEnd(int separatorPosition) { 217 return matcher.end(); 218 } 219 }; 220 } 221 }); 222 } 223 224 /** 225 * Returns a splitter that considers any subsequence matching a given 226 * pattern (regular expression) to be a separator. For example, {@code 227 * Splitter.onPattern("\r?\n").split(entireFile)} splits a string into lines 228 * whether it uses DOS-style or UNIX-style line terminators. This is 229 * equivalent to {@code Splitter.on(Pattern.compile(pattern))}. 230 * 231 * @param separatorPattern the pattern that determines whether a subsequence 232 * is a separator. This pattern may not match the empty string. 233 * @return a splitter, with default settings, that uses this pattern 234 * @throws PatternSyntaxException if {@code separatorPattern} is a malformed 235 * expression 236 * @throws IllegalArgumentException if {@code separatorPattern} matches the 237 * empty string 238 */ 239 public static Splitter onPattern(String separatorPattern) { 240 return on(Pattern.compile(separatorPattern)); 241 } 242 243 /** 244 * Returns a splitter that divides strings into pieces of the given length. 245 * For example, {@code Splitter.atEach(2).split("abcde")} returns an 246 * iterable containing {@code ["ab", "cd", "e"]}. The last piece can be 247 * smaller than {@code length} but will never be empty. 248 * 249 * @param length the desired length of pieces after splitting 250 * @return a splitter, with default settings, that can split into fixed sized 251 * pieces 252 */ 253 public static Splitter fixedLength(final int length) { 254 checkArgument(length > 0, "The length may not be less than 1"); 255 256 return new Splitter(new Strategy() { 257 /*@Override*/ public SplittingIterator iterator( 258 final Splitter splitter, CharSequence toSplit) { 259 return new SplittingIterator(splitter, toSplit) { 260 @Override public int separatorStart(int start) { 261 int nextChunkStart = start + length; 262 return (nextChunkStart < toSplit.length() ? nextChunkStart : -1); 263 } 264 265 @Override public int separatorEnd(int separatorPosition) { 266 return separatorPosition; 267 } 268 }; 269 } 270 }); 271 } 272 273 /** 274 * Returns a splitter that behaves equivalently to {@code this} splitter, but 275 * automatically omits empty strings from the results. For example, {@code 276 * Splitter.on(',').omitEmptyStrings().split(",a,,,b,c,,")} returns an 277 * iterable containing only {@code ["a", "b", "c"]}. 278 * 279 * <p>If either {@code trimResults} option is also specified when creating a 280 * splitter, that splitter always trims results first before checking for 281 * emptiness. So, for example, {@code 282 * Splitter.on(':').omitEmptyStrings().trimResults().split(": : : ")} returns 283 * an empty iterable. 284 * 285 * <p>Note that it is ordinarily not possible for {@link #split(CharSequence)} 286 * to return an empty iterable, but when using this option, it can (if the 287 * input sequence consists of nothing but separators). 288 * 289 * @return a splitter with the desired configuration 290 */ 291 public Splitter omitEmptyStrings() { 292 return new Splitter(strategy, true, trimmer); 293 } 294 295 /** 296 * Returns a splitter that behaves equivalently to {@code this} splitter, but 297 * automatically removes leading and trailing {@linkplain 298 * CharMatcher#WHITESPACE whitespace} from each returned substring; equivalent 299 * to {@code trimResults(CharMatcher.WHITESPACE)}. For example, {@code 300 * Splitter.on(',').trimResults().split(" a, b ,c ")} returns an iterable 301 * containing {@code ["a", "b", "c"]}. 302 * 303 * @return a splitter with the desired configuration 304 */ 305 public Splitter trimResults() { 306 return trimResults(CharMatcher.WHITESPACE); 307 } 308 309 /** 310 * Returns a splitter that behaves equivalently to {@code this} splitter, but 311 * removes all leading or trailing characters matching the given {@code 312 * CharMatcher} from each returned substring. For example, {@code 313 * Splitter.on(',').trimResults(CharMatcher.is('_')).split("_a ,_b_ ,c__")} 314 * returns an iterable containing {@code ["a ", "b_ ", "c"]}. 315 * 316 * @param trimmer a {@link CharMatcher} that determines whether a character 317 * should be removed from the beginning/end of a subsequence 318 * @return a splitter with the desired configuration 319 */ 320 public Splitter trimResults(CharMatcher trimmer) { 321 checkNotNull(trimmer); 322 return new Splitter(strategy, omitEmptyStrings, trimmer); 323 } 324 325 /** 326 * Splits the {@link CharSequence} passed in parameter. 327 * 328 * @param sequence the sequence of characters to split 329 * @return an iteration over the segments split from the parameter. 330 */ 331 public Iterable<String> split(final CharSequence sequence) { 332 checkNotNull(sequence); 333 334 return new Iterable<String>() { 335 /*@Override*/ public Iterator<String> iterator() { 336 return strategy.iterator(Splitter.this, sequence); 337 } 338 }; 339 } 340 341 private interface Strategy { 342 Iterator<String> iterator(Splitter splitter, CharSequence toSplit); 343 } 344 345 private abstract static class SplittingIterator 346 extends AbstractIterator<String> { 347 final CharSequence toSplit; 348 final CharMatcher trimmer; 349 final boolean omitEmptyStrings; 350 351 /** 352 * Returns the first index in {@code toSplit} at or after {@code start} 353 * that contains the separator. 354 */ 355 abstract int separatorStart(int start); 356 357 /** 358 * Returns the first index in {@code toSplit} after {@code 359 * separatorPosition} that does not contain a separator. This method is only 360 * invoked after a call to {@code separatorStart}. 361 */ 362 abstract int separatorEnd(int separatorPosition); 363 364 int offset = 0; 365 366 protected SplittingIterator(Splitter splitter, CharSequence toSplit) { 367 this.trimmer = splitter.trimmer; 368 this.omitEmptyStrings = splitter.omitEmptyStrings; 369 this.toSplit = toSplit; 370 } 371 372 @Override protected String computeNext() { 373 while (offset != -1) { 374 int start = offset; 375 int end; 376 377 int separatorPosition = separatorStart(offset); 378 if (separatorPosition == -1) { 379 end = toSplit.length(); 380 offset = -1; 381 } else { 382 end = separatorPosition; 383 offset = separatorEnd(separatorPosition); 384 } 385 386 while (start < end && trimmer.matches(toSplit.charAt(start))) { 387 start++; 388 } 389 while (end > start && trimmer.matches(toSplit.charAt(end - 1))) { 390 end--; 391 } 392 393 if (omitEmptyStrings && start == end) { 394 continue; 395 } 396 397 return toSplit.subSequence(start, end).toString(); 398 } 399 return endOfData(); 400 } 401 } 402 403 /* 404 * Copied from common.collect.AbstractIterator. TODO: un-fork once these 405 * packages have been combined into a single library. 406 */ 407 private static abstract class AbstractIterator<T> implements Iterator<T> { 408 State state = State.NOT_READY; 409 410 enum State { 411 READY, NOT_READY, DONE, FAILED, 412 } 413 414 T next; 415 416 protected abstract T computeNext(); 417 418 protected final T endOfData() { 419 state = State.DONE; 420 return null; 421 } 422 423 public final boolean hasNext() { 424 checkState(state != State.FAILED); 425 switch (state) { 426 case DONE: 427 return false; 428 case READY: 429 return true; 430 default: 431 } 432 return tryToComputeNext(); 433 } 434 435 boolean tryToComputeNext() { 436 state = State.FAILED; // temporary pessimism 437 next = computeNext(); 438 if (state != State.DONE) { 439 state = State.READY; 440 return true; 441 } 442 return false; 443 } 444 445 public final T next() { 446 if (!hasNext()) { 447 throw new NoSuchElementException(); 448 } 449 state = State.NOT_READY; 450 return next; 451 } 452 453 /*@Override*/ public void remove() { 454 throw new UnsupportedOperationException(); 455 } 456 } 457} 458