Matcher.java revision 4f5f16ccda77bc0448b42e4dc36da50e0c100591
1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. Oracle designates this 9 * particular file as subject to the "Classpath" exception as provided 10 * by Oracle in the LICENSE file that accompanied this code. 11 * 12 * This code is distributed in the hope that it will be useful, but WITHOUT 13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 * version 2 for more details (a copy is included in the LICENSE file that 16 * accompanied this code). 17 * 18 * You should have received a copy of the GNU General Public License version 19 * 2 along with this work; if not, write to the Free Software Foundation, 20 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 21 * 22 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 23 * or visit www.oracle.com if you need additional information or have any 24 * questions. 25 */ 26 27package java.util.regex; 28 29import libcore.util.NativeAllocationRegistry; 30 31/** 32 * An engine that performs match operations on a {@link java.lang.CharSequence 33 * </code>character sequence<code>} by interpreting a {@link Pattern}. 34 * 35 * <p> A matcher is created from a pattern by invoking the pattern's {@link 36 * Pattern#matcher matcher} method. Once created, a matcher can be used to 37 * perform three different kinds of match operations: 38 * 39 * <ul> 40 * 41 * <li><p> The {@link #matches matches} method attempts to match the entire 42 * input sequence against the pattern. </p></li> 43 * 44 * <li><p> The {@link #lookingAt lookingAt} method attempts to match the 45 * input sequence, starting at the beginning, against the pattern. </p></li> 46 * 47 * <li><p> The {@link #find find} method scans the input sequence looking for 48 * the next subsequence that matches the pattern. </p></li> 49 * 50 * </ul> 51 * 52 * <p> Each of these methods returns a boolean indicating success or failure. 53 * More information about a successful match can be obtained by querying the 54 * state of the matcher. 55 * 56 * <p> A matcher finds matches in a subset of its input called the 57 * <i>region</i>. By default, the region contains all of the matcher's input. 58 * The region can be modified via the{@link #region region} method and queried 59 * via the {@link #regionStart regionStart} and {@link #regionEnd regionEnd} 60 * methods. The way that the region boundaries interact with some pattern 61 * constructs can be changed. See {@link #useAnchoringBounds 62 * useAnchoringBounds} and {@link #useTransparentBounds useTransparentBounds} 63 * for more details. 64 * 65 * <p> This class also defines methods for replacing matched subsequences with 66 * new strings whose contents can, if desired, be computed from the match 67 * result. The {@link #appendReplacement appendReplacement} and {@link 68 * #appendTail appendTail} methods can be used in tandem in order to collect 69 * the result into an existing string buffer, or the more convenient {@link 70 * #replaceAll replaceAll} method can be used to create a string in which every 71 * matching subsequence in the input sequence is replaced. 72 * 73 * <p> The explicit state of a matcher includes the start and end indices of 74 * the most recent successful match. It also includes the start and end 75 * indices of the input subsequence captured by each <a 76 * href="Pattern.html#cg">capturing group</a> in the pattern as well as a total 77 * count of such subsequences. As a convenience, methods are also provided for 78 * returning these captured subsequences in string form. 79 * 80 * <p> The explicit state of a matcher is initially undefined; attempting to 81 * query any part of it before a successful match will cause an {@link 82 * IllegalStateException} to be thrown. The explicit state of a matcher is 83 * recomputed by every match operation. 84 * 85 * <p> The implicit state of a matcher includes the input character sequence as 86 * well as the <i>append position</i>, which is initially zero and is updated 87 * by the {@link #appendReplacement appendReplacement} method. 88 * 89 * <p> A matcher may be reset explicitly by invoking its {@link #reset()} 90 * method or, if a new input sequence is desired, its {@link 91 * #reset(java.lang.CharSequence) reset(CharSequence)} method. Resetting a 92 * matcher discards its explicit state information and sets the append position 93 * to zero. 94 * 95 * <p> Instances of this class are not safe for use by multiple concurrent 96 * threads. </p> 97 * 98 * 99 * @author Mike McCloskey 100 * @author Mark Reinhold 101 * @author JSR-51 Expert Group 102 * @since 1.4 103 * @spec JSR-51 104 */ 105 106public final class Matcher implements MatchResult { 107 /** 108 * The Pattern object that created this Matcher. 109 */ 110 private Pattern pattern; 111 112 /** 113 * The address of the native peer. 114 * Uses of this must be manually synchronized to avoid native crashes. 115 */ 116 private long address; 117 118 /** 119 * If non-null, a Runnable that can be used to explicitly deallocate address. 120 */ 121 private Runnable nativeFinalizer; 122 123 private static final NativeAllocationRegistry registry = new NativeAllocationRegistry( 124 Matcher.class.getClassLoader(), getNativeFinalizer(), nativeSize()); 125 126 /** 127 * Holds the original CharSequence for use in {@link #reset}. {@link #input} is used during 128 * matching. Note that CharSequence is mutable while String is not, so reset can cause the input 129 * to match to change. 130 */ 131 private CharSequence originalInput; 132 133 /** 134 * Holds the input text. 135 */ 136 private String input; 137 138 /** 139 * Holds the start of the region, or 0 if the matching should start at the 140 * beginning of the text. 141 */ 142 private int regionStart; 143 144 /** 145 * Holds the end of the region, or input.length() if the matching should 146 * go until the end of the input. 147 */ 148 private int regionEnd; 149 150 /** 151 * Holds the position where the next append operation will take place. 152 */ 153 private int appendPos; 154 155 /** 156 * Reflects whether a match has been found during the most recent find 157 * operation. 158 */ 159 private boolean matchFound; 160 161 /** 162 * Holds the offsets for the most recent match. 163 */ 164 private int[] matchOffsets; 165 166 /** 167 * Reflects whether the bounds of the region are anchoring. 168 */ 169 private boolean anchoringBounds = true; 170 171 /** 172 * Reflects whether the bounds of the region are transparent. 173 */ 174 private boolean transparentBounds; 175 176 /** 177 * All matchers have the state used by Pattern during a match. 178 */ 179 Matcher(Pattern parent, CharSequence text) { 180 usePattern(parent); 181 reset(text); 182 } 183 184 /** 185 * Returns the pattern that is interpreted by this matcher. 186 * 187 * @return The pattern for which this matcher was created 188 */ 189 public Pattern pattern() { 190 return pattern; 191 } 192 193 /** 194 * Returns the match state of this matcher as a {@link MatchResult}. 195 * The result is unaffected by subsequent operations performed upon this 196 * matcher. 197 * 198 * @return a <code>MatchResult</code> with the state of this matcher 199 * @since 1.5 200 */ 201 public MatchResult toMatchResult() { 202 ensureMatch(); 203 return new OffsetBasedMatchResult(input, matchOffsets); 204 } 205 206 /** 207 * Changes the <tt>Pattern</tt> that this <tt>Matcher</tt> uses to 208 * find matches with. 209 * 210 * <p> This method causes this matcher to lose information 211 * about the groups of the last match that occurred. The 212 * matcher's position in the input is maintained and its 213 * last append position is unaffected.</p> 214 * 215 * @param newPattern 216 * The new pattern used by this matcher 217 * @return This matcher 218 * @throws IllegalArgumentException 219 * If newPattern is <tt>null</tt> 220 * @since 1.5 221 */ 222 public Matcher usePattern(Pattern newPattern) { 223 if (newPattern == null) { 224 throw new IllegalArgumentException("newPattern == null"); 225 } 226 227 this.pattern = newPattern; 228 229 synchronized (this) { 230 if (nativeFinalizer != null) { 231 nativeFinalizer.run(); 232 address = 0; // In case openImpl throws. 233 nativeFinalizer = null; 234 } 235 address = openImpl(pattern.address); 236 nativeFinalizer = registry.registerNativeAllocation(this, address); 237 } 238 239 if (input != null) { 240 resetForInput(); 241 } 242 243 matchOffsets = new int[(groupCount() + 1) * 2]; 244 matchFound = false; 245 return this; 246 } 247 248 /** 249 * Returns the offset after the last character matched. </p> 250 * 251 * @return The offset after the last character matched 252 * 253 * @throws IllegalStateException 254 * If no match has yet been attempted, 255 * or if the previous match operation failed 256 */ 257 public int end() { 258 return end(0); 259 } 260 261 /** 262 * Returns the offset after the last character of the subsequence 263 * captured by the given group during the previous match operation. 264 * 265 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left 266 * to right, starting at one. Group zero denotes the entire pattern, so 267 * the expression <i>m.</i><tt>end(0)</tt> is equivalent to 268 * <i>m.</i><tt>end()</tt>. </p> 269 * 270 * @param group 271 * The index of a capturing group in this matcher's pattern 272 * 273 * @return The offset after the last character captured by the group, 274 * or <tt>-1</tt> if the match was successful 275 * but the group itself did not match anything 276 * 277 * @throws IllegalStateException 278 * If no match has yet been attempted, 279 * or if the previous match operation failed 280 * 281 * @throws IndexOutOfBoundsException 282 * If there is no capturing group in the pattern 283 * with the given index 284 */ 285 public int end(int group) { 286 ensureMatch(); 287 return matchOffsets[(group * 2) + 1]; 288 } 289 290 /** 291 * Returns the offset after the last character of the subsequence 292 * captured by the given <a href="Pattern.html#groupname">named-capturing 293 * group</a> during the previous match operation. 294 * 295 * @param name 296 * The name of a named-capturing group in this matcher's pattern 297 * 298 * @return The offset after the last character captured by the group, 299 * or {@code -1} if the match was successful 300 * but the group itself did not match anything 301 * 302 * @throws IllegalStateException 303 * If no match has yet been attempted, 304 * or if the previous match operation failed 305 * 306 * @throws IllegalArgumentException 307 * If there is no capturing group in the pattern 308 * with the given name 309 * @since 1.8 310 */ 311 public int end(String name) { 312 ensureMatch(); 313 return matchOffsets[getMatchedGroupIndex(pattern.address, name) * 2 + 1]; 314 } 315 316 317 /** 318 * Returns the input subsequence matched by the previous match. 319 * 320 * <p> For a matcher <i>m</i> with input sequence <i>s</i>, 321 * the expressions <i>m.</i><tt>group()</tt> and 322 * <i>s.</i><tt>substring(</tt><i>m.</i><tt>start(),</tt> <i>m.</i><tt>end())</tt> 323 * are equivalent. </p> 324 * 325 * <p> Note that some patterns, for example <tt>a*</tt>, match the empty 326 * string. This method will return the empty string when the pattern 327 * successfully matches the empty string in the input. </p> 328 * 329 * @return The (possibly empty) subsequence matched by the previous match, 330 * in string form 331 * 332 * @throws IllegalStateException 333 * If no match has yet been attempted, 334 * or if the previous match operation failed 335 */ 336 public String group() { 337 return group(0); 338 } 339 340 /** 341 * Returns the input subsequence captured by the given group during the 342 * previous match operation. 343 * 344 * <p> For a matcher <i>m</i>, input sequence <i>s</i>, and group index 345 * <i>g</i>, the expressions <i>m.</i><tt>group(</tt><i>g</i><tt>)</tt> and 346 * <i>s.</i><tt>substring(</tt><i>m.</i><tt>start(</tt><i>g</i><tt>),</tt> <i>m.</i><tt>end(</tt><i>g</i><tt>))</tt> 347 * are equivalent. </p> 348 * 349 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left 350 * to right, starting at one. Group zero denotes the entire pattern, so 351 * the expression <tt>m.group(0)</tt> is equivalent to <tt>m.group()</tt>. 352 * </p> 353 * 354 * <p> If the match was successful but the group specified failed to match 355 * any part of the input sequence, then <tt>null</tt> is returned. Note 356 * that some groups, for example <tt>(a*)</tt>, match the empty string. 357 * This method will return the empty string when such a group successfully 358 * matches the empty string in the input. </p> 359 * 360 * @param group 361 * The index of a capturing group in this matcher's pattern 362 * 363 * @return The (possibly empty) subsequence captured by the group 364 * during the previous match, or <tt>null</tt> if the group 365 * failed to match part of the input 366 * 367 * @throws IllegalStateException 368 * If no match has yet been attempted, 369 * or if the previous match operation failed 370 * 371 * @throws IndexOutOfBoundsException 372 * If there is no capturing group in the pattern 373 * with the given index 374 */ 375 public String group(int group) { 376 ensureMatch(); 377 int from = matchOffsets[group * 2]; 378 int to = matchOffsets[(group * 2) + 1]; 379 if (from == -1 || to == -1) { 380 return null; 381 } else { 382 return input.substring(from, to); 383 } 384 } 385 386 /** 387 * Returns the input subsequence captured by the given 388 * <a href="Pattern.html#groupname">named-capturing group</a> during the previous 389 * match operation. 390 * 391 * <p> If the match was successful but the group specified failed to match 392 * any part of the input sequence, then <tt>null</tt> is returned. Note 393 * that some groups, for example <tt>(a*)</tt>, match the empty string. 394 * This method will return the empty string when such a group successfully 395 * matches the empty string in the input. </p> 396 * 397 * @param name 398 * The name of a named-capturing group in this matcher's pattern 399 * 400 * @return The (possibly empty) subsequence captured by the named group 401 * during the previous match, or <tt>null</tt> if the group 402 * failed to match part of the input 403 * 404 * @throws IllegalStateException 405 * If no match has yet been attempted, 406 * or if the previous match operation failed 407 * 408 * @throws IllegalArgumentException 409 * If there is no capturing group in the pattern 410 * with the given name 411 * @since 1.7 412 */ 413 public String group(String name) { 414 ensureMatch(); 415 int group = getMatchedGroupIndex(pattern.address, name); 416 int from = matchOffsets[group * 2]; 417 int to = matchOffsets[(group * 2) + 1]; 418 if (from == -1 || to == -1) { 419 return null; 420 } else { 421 return input.substring(from, to); 422 } 423 } 424 425 /** 426 * Returns the number of capturing groups in this matcher's pattern. 427 * 428 * <p> Group zero denotes the entire pattern by convention. It is not 429 * included in this count. 430 * 431 * <p> Any non-negative integer smaller than or equal to the value 432 * returned by this method is guaranteed to be a valid group index for 433 * this matcher. </p> 434 * 435 * @return The number of capturing groups in this matcher's pattern 436 */ 437 public int groupCount() { 438 synchronized (this) { 439 return groupCountImpl(address); 440 } 441 } 442 443 /** 444 * Attempts to match the entire region against the pattern. 445 * 446 * <p> If the match succeeds then more information can be obtained via the 447 * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods. </p> 448 * 449 * @return <tt>true</tt> if, and only if, the entire region sequence 450 * matches this matcher's pattern 451 */ 452 public boolean matches() { 453 synchronized (this) { 454 matchFound = matchesImpl(address, matchOffsets); 455 } 456 return matchFound; 457 } 458 459 /** 460 * Attempts to find the next subsequence of the input sequence that matches 461 * the pattern. 462 * 463 * <p> This method starts at the beginning of this matcher's region, or, if 464 * a previous invocation of the method was successful and the matcher has 465 * not since been reset, at the first character not matched by the previous 466 * match. 467 * 468 * <p> If the match succeeds then more information can be obtained via the 469 * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods. </p> 470 * 471 * @return <tt>true</tt> if, and only if, a subsequence of the input 472 * sequence matches this matcher's pattern 473 */ 474 public boolean find() { 475 synchronized (this) { 476 matchFound = findNextImpl(address, matchOffsets); 477 } 478 return matchFound; 479 } 480 481 /** 482 * Resets this matcher and then attempts to find the next subsequence of 483 * the input sequence that matches the pattern, starting at the specified 484 * index. 485 * 486 * <p> If the match succeeds then more information can be obtained via the 487 * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods, and subsequent 488 * invocations of the {@link #find()} method will start at the first 489 * character not matched by this match. </p> 490 * 491 * @throws IndexOutOfBoundsException 492 * If start is less than zero or if start is greater than the 493 * length of the input sequence. 494 * 495 * @return <tt>true</tt> if, and only if, a subsequence of the input 496 * sequence starting at the given index matches this matcher's 497 * pattern 498 */ 499 public boolean find(int start) { 500 reset(); 501 if (start < 0 || start > input.length()) { 502 throw new IndexOutOfBoundsException("start=" + start + "; length=" + input.length()); 503 } 504 505 synchronized (this) { 506 matchFound = findImpl(address, start, matchOffsets); 507 } 508 return matchFound; 509 } 510 511 /** 512 * Attempts to match the input sequence, starting at the beginning of the 513 * region, against the pattern. 514 * 515 * <p> Like the {@link #matches matches} method, this method always starts 516 * at the beginning of the region; unlike that method, it does not 517 * require that the entire region be matched. 518 * 519 * <p> If the match succeeds then more information can be obtained via the 520 * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods. </p> 521 * 522 * @return <tt>true</tt> if, and only if, a prefix of the input 523 * sequence matches this matcher's pattern 524 */ 525 public boolean lookingAt() { 526 synchronized (this) { 527 matchFound = lookingAtImpl(address, matchOffsets); 528 } 529 return matchFound; 530 } 531 532 /** 533 * Returns a literal replacement <code>String</code> for the specified 534 * <code>String</code>. 535 * 536 * This method produces a <code>String</code> that will work 537 * as a literal replacement <code>s</code> in the 538 * <code>appendReplacement</code> method of the {@link Matcher} class. 539 * The <code>String</code> produced will match the sequence of characters 540 * in <code>s</code> treated as a literal sequence. Slashes ('\') and 541 * dollar signs ('$') will be given no special meaning. 542 * 543 * @param s The string to be literalized 544 * @return A literal string replacement 545 * @since 1.5 546 */ 547 public static String quoteReplacement(String s) { 548 if ((s.indexOf('\\') == -1) && (s.indexOf('$') == -1)) 549 return s; 550 StringBuilder sb = new StringBuilder(); 551 for (int i=0; i<s.length(); i++) { 552 char c = s.charAt(i); 553 if (c == '\\' || c == '$') { 554 sb.append('\\'); 555 } 556 sb.append(c); 557 } 558 return sb.toString(); 559 } 560 561 /** 562 * Implements a non-terminal append-and-replace step. 563 * 564 * <p> This method performs the following actions: </p> 565 * 566 * <ol> 567 * 568 * <li><p> It reads characters from the input sequence, starting at the 569 * append position, and appends them to the given string buffer. It 570 * stops after reading the last character preceding the previous match, 571 * that is, the character at index {@link 572 * #start()} <tt>-</tt> <tt>1</tt>. </p></li> 573 * 574 * <li><p> It appends the given replacement string to the string buffer. 575 * </p></li> 576 * 577 * <li><p> It sets the append position of this matcher to the index of 578 * the last character matched, plus one, that is, to {@link #end()}. 579 * </p></li> 580 * 581 * </ol> 582 * 583 * <p> The replacement string may contain references to subsequences 584 * captured during the previous match: Each occurrence of 585 * <tt>$</tt><i>g</i> will be replaced by the result of evaluating the corresponding 586 * {@link #group(int) group(g)</tt>} respectively. For <tt>$</tt><i>g</i><tt></tt>, 587 * the first number after the <tt>$</tt> is always treated as part of 588 * the group reference. Subsequent numbers are incorporated into g if 589 * they would form a legal group reference. Only the numerals '0' 590 * through '9' are considered as potential components of the group 591 * reference. If the second group matched the string <tt>"foo"</tt>, for 592 * example, then passing the replacement string <tt>"$2bar"</tt> would 593 * cause <tt>"foobar"</tt> to be appended to the string buffer. A dollar 594 * sign (<tt>$</tt>) may be included as a literal in the replacement 595 * string by preceding it with a backslash (<tt>\$</tt>). 596 * 597 * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in 598 * the replacement string may cause the results to be different than if it 599 * were being treated as a literal replacement string. Dollar signs may be 600 * treated as references to captured subsequences as described above, and 601 * backslashes are used to escape literal characters in the replacement 602 * string. 603 * 604 * <p> This method is intended to be used in a loop together with the 605 * {@link #appendTail appendTail} and {@link #find find} methods. The 606 * following code, for example, writes <tt>one dog two dogs in the 607 * yard</tt> to the standard-output stream: </p> 608 * 609 * <blockquote><pre> 610 * Pattern p = Pattern.compile("cat"); 611 * Matcher m = p.matcher("one cat two cats in the yard"); 612 * StringBuffer sb = new StringBuffer(); 613 * while (m.find()) { 614 * m.appendReplacement(sb, "dog"); 615 * } 616 * m.appendTail(sb); 617 * System.out.println(sb.toString());</pre></blockquote> 618 * 619 * @param sb 620 * The target string buffer 621 * 622 * @param replacement 623 * The replacement string 624 * 625 * @return This matcher 626 * 627 * @throws IllegalStateException 628 * If no match has yet been attempted, 629 * or if the previous match operation failed 630 * 631 * @throws IllegalArgumentException 632 * If the replacement string refers to a named-capturing 633 * group that does not exist in the pattern 634 * 635 * @throws IndexOutOfBoundsException 636 * If the replacement string refers to a capturing group 637 * that does not exist in the pattern 638 */ 639 public Matcher appendReplacement(StringBuffer sb, String replacement) { 640 sb.append(input.substring(appendPos, start())); 641 appendEvaluated(sb, replacement); 642 appendPos = end(); 643 644 return this; 645 } 646 647 /** 648 * Internal helper method to append a given string to a given string buffer. 649 * If the string contains any references to groups, these are replaced by 650 * the corresponding group's contents. 651 * 652 * @param buffer the string buffer. 653 * @param s the string to append. 654 */ 655 private void appendEvaluated(StringBuffer buffer, String s) { 656 boolean escape = false; 657 boolean dollar = false; 658 boolean escapeNamedGroup = false; 659 int escapeNamedGroupStart = -1; 660 661 for (int i = 0; i < s.length(); i++) { 662 char c = s.charAt(i); 663 if (c == '\\' && !escape) { 664 escape = true; 665 } else if (c == '$' && !escape) { 666 dollar = true; 667 } else if (c >= '0' && c <= '9' && dollar) { 668 buffer.append(group(c - '0')); 669 dollar = false; 670 } else if (c == '{' && dollar) { 671 escapeNamedGroup = true; 672 escapeNamedGroupStart = i; 673 } else if (c == '}' && dollar && escapeNamedGroup) { 674 String namedGroupName = 675 s.substring(escapeNamedGroupStart + 1, i); 676 buffer.append(group(namedGroupName)); 677 dollar = false; 678 escapeNamedGroup = false; 679 } else if (c != '}' && dollar && escapeNamedGroup) { 680 continue; 681 } else { 682 buffer.append(c); 683 dollar = false; 684 escape = false; 685 escapeNamedGroup = false; 686 } 687 } 688 689 if (escapeNamedGroup) { 690 throw new IllegalArgumentException("Missing ending brace '}' from replacement string"); 691 } 692 693 if (escape) { 694 throw new ArrayIndexOutOfBoundsException(s.length()); 695 } 696 } 697 698 699 /** 700 * Implements a terminal append-and-replace step. 701 * 702 * <p> This method reads characters from the input sequence, starting at 703 * the append position, and appends them to the given string buffer. It is 704 * intended to be invoked after one or more invocations of the {@link 705 * #appendReplacement appendReplacement} method in order to copy the 706 * remainder of the input sequence. </p> 707 * 708 * @param sb 709 * The target string buffer 710 * 711 * @return The target string buffer 712 */ 713 public StringBuffer appendTail(StringBuffer sb) { 714 if (appendPos < regionEnd) { 715 sb.append(input.substring(appendPos, regionEnd)); 716 } 717 return sb; 718 } 719 720 /** 721 * Replaces every subsequence of the input sequence that matches the 722 * pattern with the given replacement string. 723 * 724 * <p> This method first resets this matcher. It then scans the input 725 * sequence looking for matches of the pattern. Characters that are not 726 * part of any match are appended directly to the result string; each match 727 * is replaced in the result by the replacement string. The replacement 728 * string may contain references to captured subsequences as in the {@link 729 * #appendReplacement appendReplacement} method. 730 * 731 * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in 732 * the replacement string may cause the results to be different than if it 733 * were being treated as a literal replacement string. Dollar signs may be 734 * treated as references to captured subsequences as described above, and 735 * backslashes are used to escape literal characters in the replacement 736 * string. 737 * 738 * <p> Given the regular expression <tt>a*b</tt>, the input 739 * <tt>"aabfooaabfooabfoob"</tt>, and the replacement string 740 * <tt>"-"</tt>, an invocation of this method on a matcher for that 741 * expression would yield the string <tt>"-foo-foo-foo-"</tt>. 742 * 743 * <p> Invoking this method changes this matcher's state. If the matcher 744 * is to be used in further matching operations then it should first be 745 * reset. </p> 746 * 747 * @param replacement 748 * The replacement string 749 * 750 * @return The string constructed by replacing each matching subsequence 751 * by the replacement string, substituting captured subsequences 752 * as needed 753 */ 754 public String replaceAll(String replacement) { 755 reset(); 756 StringBuffer buffer = new StringBuffer(input.length()); 757 while (find()) { 758 appendReplacement(buffer, replacement); 759 } 760 return appendTail(buffer).toString(); 761 } 762 763 /** 764 * Replaces the first subsequence of the input sequence that matches the 765 * pattern with the given replacement string. 766 * 767 * <p> This method first resets this matcher. It then scans the input 768 * sequence looking for a match of the pattern. Characters that are not 769 * part of the match are appended directly to the result string; the match 770 * is replaced in the result by the replacement string. The replacement 771 * string may contain references to captured subsequences as in the {@link 772 * #appendReplacement appendReplacement} method. 773 * 774 * <p>Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in 775 * the replacement string may cause the results to be different than if it 776 * were being treated as a literal replacement string. Dollar signs may be 777 * treated as references to captured subsequences as described above, and 778 * backslashes are used to escape literal characters in the replacement 779 * string. 780 * 781 * <p> Given the regular expression <tt>dog</tt>, the input 782 * <tt>"zzzdogzzzdogzzz"</tt>, and the replacement string 783 * <tt>"cat"</tt>, an invocation of this method on a matcher for that 784 * expression would yield the string <tt>"zzzcatzzzdogzzz"</tt>. </p> 785 * 786 * <p> Invoking this method changes this matcher's state. If the matcher 787 * is to be used in further matching operations then it should first be 788 * reset. </p> 789 * 790 * @param replacement 791 * The replacement string 792 * @return The string constructed by replacing the first matching 793 * subsequence by the replacement string, substituting captured 794 * subsequences as needed 795 */ 796 public String replaceFirst(String replacement) { 797 reset(); 798 StringBuffer buffer = new StringBuffer(input.length()); 799 if (find()) { 800 appendReplacement(buffer, replacement); 801 } 802 return appendTail(buffer).toString(); 803 } 804 805 /** 806 * Sets the limits of this matcher's region. The region is the part of the 807 * input sequence that will be searched to find a match. Invoking this 808 * method resets the matcher, and then sets the region to start at the 809 * index specified by the <code>start</code> parameter and end at the 810 * index specified by the <code>end</code> parameter. 811 * 812 * <p>Depending on the transparency and anchoring being used (see 813 * {@link #useTransparentBounds useTransparentBounds} and 814 * {@link #useAnchoringBounds useAnchoringBounds}), certain constructs such 815 * as anchors may behave differently at or around the boundaries of the 816 * region. 817 * 818 * @param start 819 * The index to start searching at (inclusive) 820 * @param end 821 * The index to end searching at (exclusive) 822 * @throws IndexOutOfBoundsException 823 * If start or end is less than zero, if 824 * start is greater than the length of the input sequence, if 825 * end is greater than the length of the input sequence, or if 826 * start is greater than end. 827 * @return this matcher 828 * @since 1.5 829 */ 830 public Matcher region(int start, int end) { 831 return reset(originalInput, start, end); 832 } 833 834 /** 835 * Reports the start index of this matcher's region. The 836 * searches this matcher conducts are limited to finding matches 837 * within {@link #regionStart regionStart} (inclusive) and 838 * {@link #regionEnd regionEnd} (exclusive). 839 * 840 * @return The starting point of this matcher's region 841 * @since 1.5 842 */ 843 public int regionStart() { 844 return regionStart; 845 } 846 847 /** 848 * Reports the end index (exclusive) of this matcher's region. 849 * The searches this matcher conducts are limited to finding matches 850 * within {@link #regionStart regionStart} (inclusive) and 851 * {@link #regionEnd regionEnd} (exclusive). 852 * 853 * @return the ending point of this matcher's region 854 * @since 1.5 855 */ 856 public int regionEnd() { 857 return regionEnd; 858 } 859 860 /** 861 * Queries the transparency of region bounds for this matcher. 862 * 863 * <p> This method returns <tt>true</tt> if this matcher uses 864 * <i>transparent</i> bounds, <tt>false</tt> if it uses <i>opaque</i> 865 * bounds. 866 * 867 * <p> See {@link #useTransparentBounds useTransparentBounds} for a 868 * description of transparent and opaque bounds. 869 * 870 * <p> By default, a matcher uses opaque region boundaries. 871 * 872 * @return <tt>true</tt> iff this matcher is using transparent bounds, 873 * <tt>false</tt> otherwise. 874 * @see java.util.regex.Matcher#useTransparentBounds(boolean) 875 * @since 1.5 876 */ 877 public boolean hasTransparentBounds() { 878 return transparentBounds; 879 } 880 881 /** 882 * Sets the transparency of region bounds for this matcher. 883 * 884 * <p> Invoking this method with an argument of <tt>true</tt> will set this 885 * matcher to use <i>transparent</i> bounds. If the boolean 886 * argument is <tt>false</tt>, then <i>opaque</i> bounds will be used. 887 * 888 * <p> Using transparent bounds, the boundaries of this 889 * matcher's region are transparent to lookahead, lookbehind, 890 * and boundary matching constructs. Those constructs can see beyond the 891 * boundaries of the region to see if a match is appropriate. 892 * 893 * <p> Using opaque bounds, the boundaries of this matcher's 894 * region are opaque to lookahead, lookbehind, and boundary matching 895 * constructs that may try to see beyond them. Those constructs cannot 896 * look past the boundaries so they will fail to match anything outside 897 * of the region. 898 * 899 * <p> By default, a matcher uses opaque bounds. 900 * 901 * @param value a boolean indicating whether to use opaque or transparent 902 * regions 903 * @return this matcher 904 * @see java.util.regex.Matcher#hasTransparentBounds 905 * @since 1.5 906 */ 907 public Matcher useTransparentBounds(boolean value) { 908 synchronized (this) { 909 transparentBounds = value; 910 useTransparentBoundsImpl(address, value); 911 } 912 return this; 913 } 914 915 /** 916 * Queries the anchoring of region bounds for this matcher. 917 * 918 * <p> This method returns <tt>true</tt> if this matcher uses 919 * <i>anchoring</i> bounds, <tt>false</tt> otherwise. 920 * 921 * <p> See {@link #useAnchoringBounds useAnchoringBounds} for a 922 * description of anchoring bounds. 923 * 924 * <p> By default, a matcher uses anchoring region boundaries. 925 * 926 * @return <tt>true</tt> iff this matcher is using anchoring bounds, 927 * <tt>false</tt> otherwise. 928 * @see java.util.regex.Matcher#useAnchoringBounds(boolean) 929 * @since 1.5 930 */ 931 public boolean hasAnchoringBounds() { 932 return anchoringBounds; 933 } 934 935 /** 936 * Sets the anchoring of region bounds for this matcher. 937 * 938 * <p> Invoking this method with an argument of <tt>true</tt> will set this 939 * matcher to use <i>anchoring</i> bounds. If the boolean 940 * argument is <tt>false</tt>, then <i>non-anchoring</i> bounds will be 941 * used. 942 * 943 * <p> Using anchoring bounds, the boundaries of this 944 * matcher's region match anchors such as ^ and $. 945 * 946 * <p> Without anchoring bounds, the boundaries of this 947 * matcher's region will not match anchors such as ^ and $. 948 * 949 * <p> By default, a matcher uses anchoring region boundaries. 950 * 951 * @param value a boolean indicating whether or not to use anchoring bounds. 952 * @return this matcher 953 * @see java.util.regex.Matcher#hasAnchoringBounds 954 * @since 1.5 955 */ 956 public Matcher useAnchoringBounds(boolean value) { 957 synchronized (this) { 958 anchoringBounds = value; 959 useAnchoringBoundsImpl(address, value); 960 } 961 return this; 962 } 963 964 /** 965 * <p>Returns the string representation of this matcher. The 966 * string representation of a <code>Matcher</code> contains information 967 * that may be useful for debugging. The exact format is unspecified. 968 * 969 * @return The string representation of this matcher 970 * @since 1.5 971 */ 972 public String toString() { 973 StringBuilder sb = new StringBuilder(); 974 sb.append("java.util.regex.Matcher"); 975 sb.append("[pattern=" + pattern()); 976 sb.append(" region="); 977 sb.append(regionStart() + "," + regionEnd()); 978 sb.append(" lastmatch="); 979 if (matchFound && (group() != null)) { 980 sb.append(group()); 981 } 982 sb.append("]"); 983 return sb.toString(); 984 } 985 986 /** 987 * <p>Returns true if the end of input was hit by the search engine in 988 * the last match operation performed by this matcher. 989 * 990 * <p>When this method returns true, then it is possible that more input 991 * would have changed the result of the last search. 992 * 993 * @return true iff the end of input was hit in the last match; false 994 * otherwise 995 * @since 1.5 996 */ 997 public boolean hitEnd() { 998 synchronized (this) { 999 return hitEndImpl(address); 1000 } 1001 } 1002 1003 1004 /** 1005 * <p>Returns true if more input could change a positive match into a 1006 * negative one. 1007 * 1008 * <p>If this method returns true, and a match was found, then more 1009 * input could cause the match to be lost. If this method returns false 1010 * and a match was found, then more input might change the match but the 1011 * match won't be lost. If a match was not found, then requireEnd has no 1012 * meaning. 1013 * 1014 * @return true iff more input could change a positive match into a 1015 * negative one. 1016 * @since 1.5 1017 */ 1018 public boolean requireEnd() { 1019 synchronized (this) { 1020 return requireEndImpl(address); 1021 } 1022 } 1023 1024 /** 1025 * Resets this matcher. 1026 * 1027 * <p> Resetting a matcher discards all of its explicit state information 1028 * and sets its append position to zero. The matcher's region is set to the 1029 * default region, which is its entire character sequence. The anchoring 1030 * and transparency of this matcher's region boundaries are unaffected. 1031 * 1032 * @return This matcher 1033 */ 1034 public Matcher reset() { 1035 return reset(originalInput, 0, originalInput.length()); 1036 } 1037 1038 /** 1039 * Resets this matcher with a new input sequence. 1040 * 1041 * <p> Resetting a matcher discards all of its explicit state information 1042 * and sets its append position to zero. The matcher's region is set to 1043 * the default region, which is its entire character sequence. The 1044 * anchoring and transparency of this matcher's region boundaries are 1045 * unaffected. 1046 * 1047 * @param input 1048 * The new input character sequence 1049 * 1050 * @return This matcher 1051 */ 1052 public Matcher reset(CharSequence input) { 1053 return reset(input, 0, input.length()); 1054 } 1055 1056 /** 1057 * Resets the Matcher. A new input sequence and a new region can be 1058 * specified. Results of a previous find get lost. The next attempt to find 1059 * an occurrence of the Pattern in the string will start at the beginning of 1060 * the region. This is the internal version of reset() to which the several 1061 * public versions delegate. 1062 * 1063 * @param input 1064 * the input sequence. 1065 * @param start 1066 * the start of the region. 1067 * @param end 1068 * the end of the region. 1069 * 1070 * @return the matcher itself. 1071 */ 1072 private Matcher reset(CharSequence input, int start, int end) { 1073 if (input == null) { 1074 throw new IllegalArgumentException("input == null"); 1075 } 1076 1077 if (start < 0 || end < 0 || start > input.length() || end > input.length() || start > end) { 1078 throw new IndexOutOfBoundsException(); 1079 } 1080 1081 this.originalInput = input; 1082 this.input = input.toString(); 1083 this.regionStart = start; 1084 this.regionEnd = end; 1085 resetForInput(); 1086 1087 matchFound = false; 1088 appendPos = 0; 1089 1090 return this; 1091 } 1092 1093 private void resetForInput() { 1094 synchronized (this) { 1095 setInputImpl(address, input, regionStart, regionEnd); 1096 useAnchoringBoundsImpl(address, anchoringBounds); 1097 useTransparentBoundsImpl(address, transparentBounds); 1098 } 1099 } 1100 1101 /** 1102 * Makes sure that a successful match has been made. Is invoked internally 1103 * from various places in the class. 1104 * 1105 * @throws IllegalStateException 1106 * if no successful match has been made. 1107 */ 1108 private void ensureMatch() { 1109 if (!matchFound) { 1110 throw new IllegalStateException("No successful match so far"); 1111 } 1112 } 1113 1114 /** 1115 * Returns the start index of the previous match. </p> 1116 * 1117 * @return The index of the first character matched 1118 * 1119 * @throws IllegalStateException 1120 * If no match has yet been attempted, 1121 * or if the previous match operation failed 1122 */ 1123 public int start() { 1124 return start(0); 1125 } 1126 1127 /** 1128 * Returns the start index of the subsequence captured by the given group 1129 * during the previous match operation. 1130 * 1131 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left 1132 * to right, starting at one. Group zero denotes the entire pattern, so 1133 * the expression <i>m.</i><tt>start(0)</tt> is equivalent to 1134 * <i>m.</i><tt>start()</tt>. </p> 1135 * 1136 * @param group 1137 * The index of a capturing group in this matcher's pattern 1138 * 1139 * @return The index of the first character captured by the group, 1140 * or <tt>-1</tt> if the match was successful but the group 1141 * itself did not match anything 1142 * 1143 * @throws IllegalStateException 1144 * If no match has yet been attempted, 1145 * or if the previous match operation failed 1146 * 1147 * @throws IndexOutOfBoundsException 1148 * If there is no capturing group in the pattern 1149 * with the given index 1150 */ 1151 public int start(int group) throws IllegalStateException { 1152 ensureMatch(); 1153 return matchOffsets[group * 2]; 1154 } 1155 1156 1157 /** 1158 * Returns the start index of the subsequence captured by the given 1159 * <a href="Pattern.html#groupname">named-capturing group</a> during the 1160 * previous match operation. 1161 * 1162 * @param name 1163 * The name of a named-capturing group in this matcher's pattern 1164 * 1165 * @return The index of the first character captured by the group, 1166 * or {@code -1} if the match was successful but the group 1167 * itself did not match anything 1168 * 1169 * @throws IllegalStateException 1170 * If no match has yet been attempted, 1171 * or if the previous match operation failed 1172 * 1173 * @throws IllegalArgumentException 1174 * If there is no capturing group in the pattern 1175 * with the given name 1176 * @since 1.8 1177 */ 1178 public int start(String name) { 1179 ensureMatch(); 1180 return matchOffsets[getMatchedGroupIndex(pattern.address, name) * 2]; 1181 } 1182 1183 private static int getMatchedGroupIndex(long patternAddr, String name) { 1184 int result = getMatchedGroupIndex0(patternAddr, name); 1185 if (result < 0) { 1186 throw new IllegalArgumentException("No capturing group in the pattern " + 1187 "with the name " + name); 1188 } 1189 return result; 1190 } 1191 1192 private static native int getMatchedGroupIndex0(long patternAddr, String name); 1193 private static native boolean findImpl(long addr, int startIndex, int[] offsets); 1194 private static native boolean findNextImpl(long addr, int[] offsets); 1195 private static native long getNativeFinalizer(); 1196 private static native int groupCountImpl(long addr); 1197 private static native boolean hitEndImpl(long addr); 1198 private static native boolean lookingAtImpl(long addr, int[] offsets); 1199 private static native boolean matchesImpl(long addr, int[] offsets); 1200 private static native int nativeSize(); 1201 private static native long openImpl(long patternAddr); 1202 private static native boolean requireEndImpl(long addr); 1203 private static native void setInputImpl(long addr, String s, int start, int end); 1204 private static native void useAnchoringBoundsImpl(long addr, boolean value); 1205 private static native void useTransparentBoundsImpl(long addr, boolean value); 1206 1207 /** 1208 * A trivial match result implementation that's based on an array of integers 1209 * representing match offsets. The array is of the form 1210 * {@code { start1, end1, start2, end2 ....}) where each consecutive pair of elements represents 1211 * the start and end of a match respectively. 1212 */ 1213 static final class OffsetBasedMatchResult implements MatchResult { 1214 private final String input; 1215 private final int[] offsets; 1216 1217 OffsetBasedMatchResult(String input, int[] offsets) { 1218 this.input = input; 1219 this.offsets = offsets.clone(); 1220 } 1221 1222 @Override 1223 public int start() { 1224 return start(0); 1225 } 1226 1227 @Override 1228 public int start(int group) { 1229 return offsets[2 * group]; 1230 } 1231 1232 @Override 1233 public int end() { 1234 return end(0); 1235 } 1236 1237 @Override 1238 public int end(int group) { 1239 return offsets[2 * group + 1]; 1240 } 1241 1242 @Override 1243 public String group() { 1244 return group(0); 1245 } 1246 1247 @Override 1248 public String group(int group) { 1249 final int start = start(group); 1250 final int end = end(group); 1251 if (start == -1 || end == -1) { 1252 return null; 1253 } 1254 1255 return input.substring(start, end); 1256 } 1257 1258 @Override 1259 public int groupCount() { 1260 return (offsets.length / 2) - 1; 1261 } 1262 } 1263} 1264