1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. Oracle designates this 9 * particular file as subject to the "Classpath" exception as provided 10 * by Oracle in the LICENSE file that accompanied this code. 11 * 12 * This code is distributed in the hope that it will be useful, but WITHOUT 13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 * version 2 for more details (a copy is included in the LICENSE file that 16 * accompanied this code). 17 * 18 * You should have received a copy of the GNU General Public License version 19 * 2 along with this work; if not, write to the Free Software Foundation, 20 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 21 * 22 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 23 * or visit www.oracle.com if you need additional information or have any 24 * questions. 25 */ 26 27package java.util.regex; 28 29import libcore.util.NativeAllocationRegistry; 30 31/** 32 * An engine that performs match operations on a {@link java.lang.CharSequence 33 * </code>character sequence<code>} by interpreting a {@link Pattern}. 34 * 35 * <p> A matcher is created from a pattern by invoking the pattern's {@link 36 * Pattern#matcher matcher} method. Once created, a matcher can be used to 37 * perform three different kinds of match operations: 38 * 39 * <ul> 40 * 41 * <li><p> The {@link #matches matches} method attempts to match the entire 42 * input sequence against the pattern. </p></li> 43 * 44 * <li><p> The {@link #lookingAt lookingAt} method attempts to match the 45 * input sequence, starting at the beginning, against the pattern. </p></li> 46 * 47 * <li><p> The {@link #find find} method scans the input sequence looking for 48 * the next subsequence that matches the pattern. </p></li> 49 * 50 * </ul> 51 * 52 * <p> Each of these methods returns a boolean indicating success or failure. 53 * More information about a successful match can be obtained by querying the 54 * state of the matcher. 55 * 56 * <p> A matcher finds matches in a subset of its input called the 57 * <i>region</i>. By default, the region contains all of the matcher's input. 58 * The region can be modified via the{@link #region region} method and queried 59 * via the {@link #regionStart regionStart} and {@link #regionEnd regionEnd} 60 * methods. The way that the region boundaries interact with some pattern 61 * constructs can be changed. See {@link #useAnchoringBounds 62 * useAnchoringBounds} and {@link #useTransparentBounds useTransparentBounds} 63 * for more details. 64 * 65 * <p> This class also defines methods for replacing matched subsequences with 66 * new strings whose contents can, if desired, be computed from the match 67 * result. The {@link #appendReplacement appendReplacement} and {@link 68 * #appendTail appendTail} methods can be used in tandem in order to collect 69 * the result into an existing string buffer, or the more convenient {@link 70 * #replaceAll replaceAll} method can be used to create a string in which every 71 * matching subsequence in the input sequence is replaced. 72 * 73 * <p> The explicit state of a matcher includes the start and end indices of 74 * the most recent successful match. It also includes the start and end 75 * indices of the input subsequence captured by each <a 76 * href="Pattern.html#cg">capturing group</a> in the pattern as well as a total 77 * count of such subsequences. As a convenience, methods are also provided for 78 * returning these captured subsequences in string form. 79 * 80 * <p> The explicit state of a matcher is initially undefined; attempting to 81 * query any part of it before a successful match will cause an {@link 82 * IllegalStateException} to be thrown. The explicit state of a matcher is 83 * recomputed by every match operation. 84 * 85 * <p> The implicit state of a matcher includes the input character sequence as 86 * well as the <i>append position</i>, which is initially zero and is updated 87 * by the {@link #appendReplacement appendReplacement} method. 88 * 89 * <p> A matcher may be reset explicitly by invoking its {@link #reset()} 90 * method or, if a new input sequence is desired, its {@link 91 * #reset(java.lang.CharSequence) reset(CharSequence)} method. Resetting a 92 * matcher discards its explicit state information and sets the append position 93 * to zero. 94 * 95 * <p> Instances of this class are not safe for use by multiple concurrent 96 * threads. </p> 97 * 98 * 99 * @author Mike McCloskey 100 * @author Mark Reinhold 101 * @author JSR-51 Expert Group 102 * @since 1.4 103 * @spec JSR-51 104 */ 105 106public final class Matcher implements MatchResult { 107 /** 108 * The Pattern object that created this Matcher. 109 */ 110 private Pattern pattern; 111 112 /** 113 * The address of the native peer. 114 * Uses of this must be manually synchronized to avoid native crashes. 115 */ 116 private long address; 117 118 /** 119 * If non-null, a Runnable that can be used to explicitly deallocate address. 120 */ 121 private Runnable nativeFinalizer; 122 123 private static final NativeAllocationRegistry registry = new NativeAllocationRegistry( 124 Matcher.class.getClassLoader(), getNativeFinalizer(), nativeSize()); 125 126 /** 127 * Holds the input text. 128 */ 129 private String input; 130 131 /** 132 * Holds the start of the region, or 0 if the matching should start at the 133 * beginning of the text. 134 */ 135 private int regionStart; 136 137 /** 138 * Holds the end of the region, or input.length() if the matching should 139 * go until the end of the input. 140 */ 141 private int regionEnd; 142 143 /** 144 * Holds the position where the next append operation will take place. 145 */ 146 private int appendPos; 147 148 /** 149 * Reflects whether a match has been found during the most recent find 150 * operation. 151 */ 152 private boolean matchFound; 153 154 /** 155 * Holds the offsets for the most recent match. 156 */ 157 private int[] matchOffsets; 158 159 /** 160 * Reflects whether the bounds of the region are anchoring. 161 */ 162 private boolean anchoringBounds = true; 163 164 /** 165 * Reflects whether the bounds of the region are transparent. 166 */ 167 private boolean transparentBounds; 168 169 /** 170 * All matchers have the state used by Pattern during a match. 171 */ 172 Matcher(Pattern parent, CharSequence text) { 173 usePattern(parent); 174 reset(text); 175 } 176 177 /** 178 * Returns the pattern that is interpreted by this matcher. 179 * 180 * @return The pattern for which this matcher was created 181 */ 182 public Pattern pattern() { 183 return pattern; 184 } 185 186 /** 187 * Returns the match state of this matcher as a {@link MatchResult}. 188 * The result is unaffected by subsequent operations performed upon this 189 * matcher. 190 * 191 * @return a <code>MatchResult</code> with the state of this matcher 192 * @since 1.5 193 */ 194 public MatchResult toMatchResult() { 195 ensureMatch(); 196 return new OffsetBasedMatchResult(input, matchOffsets); 197 } 198 199 /** 200 * Changes the <tt>Pattern</tt> that this <tt>Matcher</tt> uses to 201 * find matches with. 202 * 203 * <p> This method causes this matcher to lose information 204 * about the groups of the last match that occurred. The 205 * matcher's position in the input is maintained and its 206 * last append position is unaffected.</p> 207 * 208 * @param newPattern 209 * The new pattern used by this matcher 210 * @return This matcher 211 * @throws IllegalArgumentException 212 * If newPattern is <tt>null</tt> 213 * @since 1.5 214 */ 215 public Matcher usePattern(Pattern newPattern) { 216 if (newPattern == null) { 217 throw new IllegalArgumentException("newPattern == null"); 218 } 219 220 this.pattern = newPattern; 221 222 synchronized (this) { 223 if (nativeFinalizer != null) { 224 nativeFinalizer.run(); 225 address = 0; // In case openImpl throws. 226 nativeFinalizer = null; 227 } 228 address = openImpl(pattern.address); 229 nativeFinalizer = registry.registerNativeAllocation(this, address); 230 } 231 232 if (input != null) { 233 resetForInput(); 234 } 235 236 matchOffsets = new int[(groupCount() + 1) * 2]; 237 matchFound = false; 238 return this; 239 } 240 241 /** 242 * Returns the offset after the last character matched. </p> 243 * 244 * @return The offset after the last character matched 245 * 246 * @throws IllegalStateException 247 * If no match has yet been attempted, 248 * or if the previous match operation failed 249 */ 250 public int end() { 251 return end(0); 252 } 253 254 /** 255 * Returns the offset after the last character of the subsequence 256 * captured by the given group during the previous match operation. 257 * 258 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left 259 * to right, starting at one. Group zero denotes the entire pattern, so 260 * the expression <i>m.</i><tt>end(0)</tt> is equivalent to 261 * <i>m.</i><tt>end()</tt>. </p> 262 * 263 * @param group 264 * The index of a capturing group in this matcher's pattern 265 * 266 * @return The offset after the last character captured by the group, 267 * or <tt>-1</tt> if the match was successful 268 * but the group itself did not match anything 269 * 270 * @throws IllegalStateException 271 * If no match has yet been attempted, 272 * or if the previous match operation failed 273 * 274 * @throws IndexOutOfBoundsException 275 * If there is no capturing group in the pattern 276 * with the given index 277 */ 278 public int end(int group) { 279 ensureMatch(); 280 return matchOffsets[(group * 2) + 1]; 281 } 282 283 /** 284 * Returns the offset after the last character of the subsequence 285 * captured by the given <a href="Pattern.html#groupname">named-capturing 286 * group</a> during the previous match operation. 287 * 288 * @param name 289 * The name of a named-capturing group in this matcher's pattern 290 * 291 * @return The offset after the last character captured by the group, 292 * or {@code -1} if the match was successful 293 * but the group itself did not match anything 294 * 295 * @throws IllegalStateException 296 * If no match has yet been attempted, 297 * or if the previous match operation failed 298 * 299 * @throws IllegalArgumentException 300 * If there is no capturing group in the pattern 301 * with the given name 302 * @since 1.8 303 */ 304 public int end(String name) { 305 ensureMatch(); 306 return matchOffsets[getMatchedGroupIndex(pattern.address, name) * 2 + 1]; 307 } 308 309 310 /** 311 * Returns the input subsequence matched by the previous match. 312 * 313 * <p> For a matcher <i>m</i> with input sequence <i>s</i>, 314 * the expressions <i>m.</i><tt>group()</tt> and 315 * <i>s.</i><tt>substring(</tt><i>m.</i><tt>start(),</tt> <i>m.</i><tt>end())</tt> 316 * are equivalent. </p> 317 * 318 * <p> Note that some patterns, for example <tt>a*</tt>, match the empty 319 * string. This method will return the empty string when the pattern 320 * successfully matches the empty string in the input. </p> 321 * 322 * @return The (possibly empty) subsequence matched by the previous match, 323 * in string form 324 * 325 * @throws IllegalStateException 326 * If no match has yet been attempted, 327 * or if the previous match operation failed 328 */ 329 public String group() { 330 return group(0); 331 } 332 333 /** 334 * Returns the input subsequence captured by the given group during the 335 * previous match operation. 336 * 337 * <p> For a matcher <i>m</i>, input sequence <i>s</i>, and group index 338 * <i>g</i>, the expressions <i>m.</i><tt>group(</tt><i>g</i><tt>)</tt> and 339 * <i>s.</i><tt>substring(</tt><i>m.</i><tt>start(</tt><i>g</i><tt>),</tt> <i>m.</i><tt>end(</tt><i>g</i><tt>))</tt> 340 * are equivalent. </p> 341 * 342 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left 343 * to right, starting at one. Group zero denotes the entire pattern, so 344 * the expression <tt>m.group(0)</tt> is equivalent to <tt>m.group()</tt>. 345 * </p> 346 * 347 * <p> If the match was successful but the group specified failed to match 348 * any part of the input sequence, then <tt>null</tt> is returned. Note 349 * that some groups, for example <tt>(a*)</tt>, match the empty string. 350 * This method will return the empty string when such a group successfully 351 * matches the empty string in the input. </p> 352 * 353 * @param group 354 * The index of a capturing group in this matcher's pattern 355 * 356 * @return The (possibly empty) subsequence captured by the group 357 * during the previous match, or <tt>null</tt> if the group 358 * failed to match part of the input 359 * 360 * @throws IllegalStateException 361 * If no match has yet been attempted, 362 * or if the previous match operation failed 363 * 364 * @throws IndexOutOfBoundsException 365 * If there is no capturing group in the pattern 366 * with the given index 367 */ 368 public String group(int group) { 369 ensureMatch(); 370 int from = matchOffsets[group * 2]; 371 int to = matchOffsets[(group * 2) + 1]; 372 if (from == -1 || to == -1) { 373 return null; 374 } else { 375 return input.substring(from, to); 376 } 377 } 378 379 /** 380 * Returns the input subsequence captured by the given 381 * <a href="Pattern.html#groupname">named-capturing group</a> during the previous 382 * match operation. 383 * 384 * <p> If the match was successful but the group specified failed to match 385 * any part of the input sequence, then <tt>null</tt> is returned. Note 386 * that some groups, for example <tt>(a*)</tt>, match the empty string. 387 * This method will return the empty string when such a group successfully 388 * matches the empty string in the input. </p> 389 * 390 * @param name 391 * The name of a named-capturing group in this matcher's pattern 392 * 393 * @return The (possibly empty) subsequence captured by the named group 394 * during the previous match, or <tt>null</tt> if the group 395 * failed to match part of the input 396 * 397 * @throws IllegalStateException 398 * If no match has yet been attempted, 399 * or if the previous match operation failed 400 * 401 * @throws IllegalArgumentException 402 * If there is no capturing group in the pattern 403 * with the given name 404 * @since 1.7 405 */ 406 public String group(String name) { 407 ensureMatch(); 408 int group = getMatchedGroupIndex(pattern.address, name); 409 int from = matchOffsets[group * 2]; 410 int to = matchOffsets[(group * 2) + 1]; 411 if (from == -1 || to == -1) { 412 return null; 413 } else { 414 return input.substring(from, to); 415 } 416 } 417 418 /** 419 * Returns the number of capturing groups in this matcher's pattern. 420 * 421 * <p> Group zero denotes the entire pattern by convention. It is not 422 * included in this count. 423 * 424 * <p> Any non-negative integer smaller than or equal to the value 425 * returned by this method is guaranteed to be a valid group index for 426 * this matcher. </p> 427 * 428 * @return The number of capturing groups in this matcher's pattern 429 */ 430 public int groupCount() { 431 synchronized (this) { 432 return groupCountImpl(address); 433 } 434 } 435 436 /** 437 * Attempts to match the entire region against the pattern. 438 * 439 * <p> If the match succeeds then more information can be obtained via the 440 * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods. </p> 441 * 442 * @return <tt>true</tt> if, and only if, the entire region sequence 443 * matches this matcher's pattern 444 */ 445 public boolean matches() { 446 synchronized (this) { 447 matchFound = matchesImpl(address, matchOffsets); 448 } 449 return matchFound; 450 } 451 452 /** 453 * Attempts to find the next subsequence of the input sequence that matches 454 * the pattern. 455 * 456 * <p> This method starts at the beginning of this matcher's region, or, if 457 * a previous invocation of the method was successful and the matcher has 458 * not since been reset, at the first character not matched by the previous 459 * match. 460 * 461 * <p> If the match succeeds then more information can be obtained via the 462 * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods. </p> 463 * 464 * @return <tt>true</tt> if, and only if, a subsequence of the input 465 * sequence matches this matcher's pattern 466 */ 467 public boolean find() { 468 synchronized (this) { 469 matchFound = findNextImpl(address, matchOffsets); 470 } 471 return matchFound; 472 } 473 474 /** 475 * Resets this matcher and then attempts to find the next subsequence of 476 * the input sequence that matches the pattern, starting at the specified 477 * index. 478 * 479 * <p> If the match succeeds then more information can be obtained via the 480 * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods, and subsequent 481 * invocations of the {@link #find()} method will start at the first 482 * character not matched by this match. </p> 483 * 484 * @throws IndexOutOfBoundsException 485 * If start is less than zero or if start is greater than the 486 * length of the input sequence. 487 * 488 * @return <tt>true</tt> if, and only if, a subsequence of the input 489 * sequence starting at the given index matches this matcher's 490 * pattern 491 */ 492 public boolean find(int start) { 493 if (start < 0 || start > input.length()) { 494 throw new IndexOutOfBoundsException("start=" + start + "; length=" + input.length()); 495 } 496 497 synchronized (this) { 498 matchFound = findImpl(address, start, matchOffsets); 499 } 500 return matchFound; 501 } 502 503 /** 504 * Attempts to match the input sequence, starting at the beginning of the 505 * region, against the pattern. 506 * 507 * <p> Like the {@link #matches matches} method, this method always starts 508 * at the beginning of the region; unlike that method, it does not 509 * require that the entire region be matched. 510 * 511 * <p> If the match succeeds then more information can be obtained via the 512 * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods. </p> 513 * 514 * @return <tt>true</tt> if, and only if, a prefix of the input 515 * sequence matches this matcher's pattern 516 */ 517 public boolean lookingAt() { 518 synchronized (this) { 519 matchFound = lookingAtImpl(address, matchOffsets); 520 } 521 return matchFound; 522 } 523 524 /** 525 * Returns a literal replacement <code>String</code> for the specified 526 * <code>String</code>. 527 * 528 * This method produces a <code>String</code> that will work 529 * as a literal replacement <code>s</code> in the 530 * <code>appendReplacement</code> method of the {@link Matcher} class. 531 * The <code>String</code> produced will match the sequence of characters 532 * in <code>s</code> treated as a literal sequence. Slashes ('\') and 533 * dollar signs ('$') will be given no special meaning. 534 * 535 * @param s The string to be literalized 536 * @return A literal string replacement 537 * @since 1.5 538 */ 539 public static String quoteReplacement(String s) { 540 if ((s.indexOf('\\') == -1) && (s.indexOf('$') == -1)) 541 return s; 542 StringBuilder sb = new StringBuilder(); 543 for (int i=0; i<s.length(); i++) { 544 char c = s.charAt(i); 545 if (c == '\\' || c == '$') { 546 sb.append('\\'); 547 } 548 sb.append(c); 549 } 550 return sb.toString(); 551 } 552 553 /** 554 * Implements a non-terminal append-and-replace step. 555 * 556 * <p> This method performs the following actions: </p> 557 * 558 * <ol> 559 * 560 * <li><p> It reads characters from the input sequence, starting at the 561 * append position, and appends them to the given string buffer. It 562 * stops after reading the last character preceding the previous match, 563 * that is, the character at index {@link 564 * #start()} <tt>-</tt> <tt>1</tt>. </p></li> 565 * 566 * <li><p> It appends the given replacement string to the string buffer. 567 * </p></li> 568 * 569 * <li><p> It sets the append position of this matcher to the index of 570 * the last character matched, plus one, that is, to {@link #end()}. 571 * </p></li> 572 * 573 * </ol> 574 * 575 * <p> The replacement string may contain references to subsequences 576 * captured during the previous match: Each occurrence of 577 * <tt>$</tt><i>g</i> will be replaced by the result of evaluating the corresponding 578 * {@link #group(int) group(g)</tt>} respectively. For <tt>$</tt><i>g</i><tt></tt>, 579 * the first number after the <tt>$</tt> is always treated as part of 580 * the group reference. Subsequent numbers are incorporated into g if 581 * they would form a legal group reference. Only the numerals '0' 582 * through '9' are considered as potential components of the group 583 * reference. If the second group matched the string <tt>"foo"</tt>, for 584 * example, then passing the replacement string <tt>"$2bar"</tt> would 585 * cause <tt>"foobar"</tt> to be appended to the string buffer. A dollar 586 * sign (<tt>$</tt>) may be included as a literal in the replacement 587 * string by preceding it with a backslash (<tt>\$</tt>). 588 * 589 * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in 590 * the replacement string may cause the results to be different than if it 591 * were being treated as a literal replacement string. Dollar signs may be 592 * treated as references to captured subsequences as described above, and 593 * backslashes are used to escape literal characters in the replacement 594 * string. 595 * 596 * <p> This method is intended to be used in a loop together with the 597 * {@link #appendTail appendTail} and {@link #find find} methods. The 598 * following code, for example, writes <tt>one dog two dogs in the 599 * yard</tt> to the standard-output stream: </p> 600 * 601 * <blockquote><pre> 602 * Pattern p = Pattern.compile("cat"); 603 * Matcher m = p.matcher("one cat two cats in the yard"); 604 * StringBuffer sb = new StringBuffer(); 605 * while (m.find()) { 606 * m.appendReplacement(sb, "dog"); 607 * } 608 * m.appendTail(sb); 609 * System.out.println(sb.toString());</pre></blockquote> 610 * 611 * @param sb 612 * The target string buffer 613 * 614 * @param replacement 615 * The replacement string 616 * 617 * @return This matcher 618 * 619 * @throws IllegalStateException 620 * If no match has yet been attempted, 621 * or if the previous match operation failed 622 * 623 * @throws IllegalArgumentException 624 * If the replacement string refers to a named-capturing 625 * group that does not exist in the pattern 626 * 627 * @throws IndexOutOfBoundsException 628 * If the replacement string refers to a capturing group 629 * that does not exist in the pattern 630 */ 631 public Matcher appendReplacement(StringBuffer sb, String replacement) { 632 sb.append(input.substring(appendPos, start())); 633 appendEvaluated(sb, replacement); 634 appendPos = end(); 635 636 return this; 637 } 638 639 /** 640 * Internal helper method to append a given string to a given string buffer. 641 * If the string contains any references to groups, these are replaced by 642 * the corresponding group's contents. 643 * 644 * @param buffer the string buffer. 645 * @param s the string to append. 646 */ 647 private void appendEvaluated(StringBuffer buffer, String s) { 648 boolean escape = false; 649 boolean dollar = false; 650 boolean escapeNamedGroup = false; 651 int escapeNamedGroupStart = -1; 652 653 for (int i = 0; i < s.length(); i++) { 654 char c = s.charAt(i); 655 if (c == '\\' && !escape) { 656 escape = true; 657 } else if (c == '$' && !escape) { 658 dollar = true; 659 } else if (c >= '0' && c <= '9' && dollar) { 660 buffer.append(group(c - '0')); 661 dollar = false; 662 } else if (c == '{' && dollar) { 663 escapeNamedGroup = true; 664 escapeNamedGroupStart = i; 665 } else if (c == '}' && dollar && escapeNamedGroup) { 666 String namedGroupName = 667 s.substring(escapeNamedGroupStart + 1, i); 668 buffer.append(group(namedGroupName)); 669 dollar = false; 670 escapeNamedGroup = false; 671 } else if (c != '}' && dollar && escapeNamedGroup) { 672 continue; 673 } else { 674 buffer.append(c); 675 dollar = false; 676 escape = false; 677 escapeNamedGroup = false; 678 } 679 } 680 681 if (escapeNamedGroup) { 682 throw new IllegalArgumentException("Missing ending brace '}' from replacement string"); 683 } 684 685 if (escape) { 686 throw new ArrayIndexOutOfBoundsException(s.length()); 687 } 688 } 689 690 691 /** 692 * Implements a terminal append-and-replace step. 693 * 694 * <p> This method reads characters from the input sequence, starting at 695 * the append position, and appends them to the given string buffer. It is 696 * intended to be invoked after one or more invocations of the {@link 697 * #appendReplacement appendReplacement} method in order to copy the 698 * remainder of the input sequence. </p> 699 * 700 * @param sb 701 * The target string buffer 702 * 703 * @return The target string buffer 704 */ 705 public StringBuffer appendTail(StringBuffer sb) { 706 if (appendPos < regionEnd) { 707 sb.append(input.substring(appendPos, regionEnd)); 708 } 709 return sb; 710 } 711 712 /** 713 * Replaces every subsequence of the input sequence that matches the 714 * pattern with the given replacement string. 715 * 716 * <p> This method first resets this matcher. It then scans the input 717 * sequence looking for matches of the pattern. Characters that are not 718 * part of any match are appended directly to the result string; each match 719 * is replaced in the result by the replacement string. The replacement 720 * string may contain references to captured subsequences as in the {@link 721 * #appendReplacement appendReplacement} method. 722 * 723 * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in 724 * the replacement string may cause the results to be different than if it 725 * were being treated as a literal replacement string. Dollar signs may be 726 * treated as references to captured subsequences as described above, and 727 * backslashes are used to escape literal characters in the replacement 728 * string. 729 * 730 * <p> Given the regular expression <tt>a*b</tt>, the input 731 * <tt>"aabfooaabfooabfoob"</tt>, and the replacement string 732 * <tt>"-"</tt>, an invocation of this method on a matcher for that 733 * expression would yield the string <tt>"-foo-foo-foo-"</tt>. 734 * 735 * <p> Invoking this method changes this matcher's state. If the matcher 736 * is to be used in further matching operations then it should first be 737 * reset. </p> 738 * 739 * @param replacement 740 * The replacement string 741 * 742 * @return The string constructed by replacing each matching subsequence 743 * by the replacement string, substituting captured subsequences 744 * as needed 745 */ 746 public String replaceAll(String replacement) { 747 reset(); 748 StringBuffer buffer = new StringBuffer(input.length()); 749 while (find()) { 750 appendReplacement(buffer, replacement); 751 } 752 return appendTail(buffer).toString(); 753 } 754 755 /** 756 * Replaces the first subsequence of the input sequence that matches the 757 * pattern with the given replacement string. 758 * 759 * <p> This method first resets this matcher. It then scans the input 760 * sequence looking for a match of the pattern. Characters that are not 761 * part of the match are appended directly to the result string; the match 762 * is replaced in the result by the replacement string. The replacement 763 * string may contain references to captured subsequences as in the {@link 764 * #appendReplacement appendReplacement} method. 765 * 766 * <p>Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in 767 * the replacement string may cause the results to be different than if it 768 * were being treated as a literal replacement string. Dollar signs may be 769 * treated as references to captured subsequences as described above, and 770 * backslashes are used to escape literal characters in the replacement 771 * string. 772 * 773 * <p> Given the regular expression <tt>dog</tt>, the input 774 * <tt>"zzzdogzzzdogzzz"</tt>, and the replacement string 775 * <tt>"cat"</tt>, an invocation of this method on a matcher for that 776 * expression would yield the string <tt>"zzzcatzzzdogzzz"</tt>. </p> 777 * 778 * <p> Invoking this method changes this matcher's state. If the matcher 779 * is to be used in further matching operations then it should first be 780 * reset. </p> 781 * 782 * @param replacement 783 * The replacement string 784 * @return The string constructed by replacing the first matching 785 * subsequence by the replacement string, substituting captured 786 * subsequences as needed 787 */ 788 public String replaceFirst(String replacement) { 789 reset(); 790 StringBuffer buffer = new StringBuffer(input.length()); 791 if (find()) { 792 appendReplacement(buffer, replacement); 793 } 794 return appendTail(buffer).toString(); 795 } 796 797 /** 798 * Sets the limits of this matcher's region. The region is the part of the 799 * input sequence that will be searched to find a match. Invoking this 800 * method resets the matcher, and then sets the region to start at the 801 * index specified by the <code>start</code> parameter and end at the 802 * index specified by the <code>end</code> parameter. 803 * 804 * <p>Depending on the transparency and anchoring being used (see 805 * {@link #useTransparentBounds useTransparentBounds} and 806 * {@link #useAnchoringBounds useAnchoringBounds}), certain constructs such 807 * as anchors may behave differently at or around the boundaries of the 808 * region. 809 * 810 * @param start 811 * The index to start searching at (inclusive) 812 * @param end 813 * The index to end searching at (exclusive) 814 * @throws IndexOutOfBoundsException 815 * If start or end is less than zero, if 816 * start is greater than the length of the input sequence, if 817 * end is greater than the length of the input sequence, or if 818 * start is greater than end. 819 * @return this matcher 820 * @since 1.5 821 */ 822 public Matcher region(int start, int end) { 823 return reset(input, start, end); 824 } 825 826 /** 827 * Reports the start index of this matcher's region. The 828 * searches this matcher conducts are limited to finding matches 829 * within {@link #regionStart regionStart} (inclusive) and 830 * {@link #regionEnd regionEnd} (exclusive). 831 * 832 * @return The starting point of this matcher's region 833 * @since 1.5 834 */ 835 public int regionStart() { 836 return regionStart; 837 } 838 839 /** 840 * Reports the end index (exclusive) of this matcher's region. 841 * The searches this matcher conducts are limited to finding matches 842 * within {@link #regionStart regionStart} (inclusive) and 843 * {@link #regionEnd regionEnd} (exclusive). 844 * 845 * @return the ending point of this matcher's region 846 * @since 1.5 847 */ 848 public int regionEnd() { 849 return regionEnd; 850 } 851 852 /** 853 * Queries the transparency of region bounds for this matcher. 854 * 855 * <p> This method returns <tt>true</tt> if this matcher uses 856 * <i>transparent</i> bounds, <tt>false</tt> if it uses <i>opaque</i> 857 * bounds. 858 * 859 * <p> See {@link #useTransparentBounds useTransparentBounds} for a 860 * description of transparent and opaque bounds. 861 * 862 * <p> By default, a matcher uses opaque region boundaries. 863 * 864 * @return <tt>true</tt> iff this matcher is using transparent bounds, 865 * <tt>false</tt> otherwise. 866 * @see java.util.regex.Matcher#useTransparentBounds(boolean) 867 * @since 1.5 868 */ 869 public boolean hasTransparentBounds() { 870 return transparentBounds; 871 } 872 873 /** 874 * Sets the transparency of region bounds for this matcher. 875 * 876 * <p> Invoking this method with an argument of <tt>true</tt> will set this 877 * matcher to use <i>transparent</i> bounds. If the boolean 878 * argument is <tt>false</tt>, then <i>opaque</i> bounds will be used. 879 * 880 * <p> Using transparent bounds, the boundaries of this 881 * matcher's region are transparent to lookahead, lookbehind, 882 * and boundary matching constructs. Those constructs can see beyond the 883 * boundaries of the region to see if a match is appropriate. 884 * 885 * <p> Using opaque bounds, the boundaries of this matcher's 886 * region are opaque to lookahead, lookbehind, and boundary matching 887 * constructs that may try to see beyond them. Those constructs cannot 888 * look past the boundaries so they will fail to match anything outside 889 * of the region. 890 * 891 * <p> By default, a matcher uses opaque bounds. 892 * 893 * @param value a boolean indicating whether to use opaque or transparent 894 * regions 895 * @return this matcher 896 * @see java.util.regex.Matcher#hasTransparentBounds 897 * @since 1.5 898 */ 899 public Matcher useTransparentBounds(boolean value) { 900 synchronized (this) { 901 transparentBounds = value; 902 useTransparentBoundsImpl(address, value); 903 } 904 return this; 905 } 906 907 /** 908 * Queries the anchoring of region bounds for this matcher. 909 * 910 * <p> This method returns <tt>true</tt> if this matcher uses 911 * <i>anchoring</i> bounds, <tt>false</tt> otherwise. 912 * 913 * <p> See {@link #useAnchoringBounds useAnchoringBounds} for a 914 * description of anchoring bounds. 915 * 916 * <p> By default, a matcher uses anchoring region boundaries. 917 * 918 * @return <tt>true</tt> iff this matcher is using anchoring bounds, 919 * <tt>false</tt> otherwise. 920 * @see java.util.regex.Matcher#useAnchoringBounds(boolean) 921 * @since 1.5 922 */ 923 public boolean hasAnchoringBounds() { 924 return anchoringBounds; 925 } 926 927 /** 928 * Sets the anchoring of region bounds for this matcher. 929 * 930 * <p> Invoking this method with an argument of <tt>true</tt> will set this 931 * matcher to use <i>anchoring</i> bounds. If the boolean 932 * argument is <tt>false</tt>, then <i>non-anchoring</i> bounds will be 933 * used. 934 * 935 * <p> Using anchoring bounds, the boundaries of this 936 * matcher's region match anchors such as ^ and $. 937 * 938 * <p> Without anchoring bounds, the boundaries of this 939 * matcher's region will not match anchors such as ^ and $. 940 * 941 * <p> By default, a matcher uses anchoring region boundaries. 942 * 943 * @param value a boolean indicating whether or not to use anchoring bounds. 944 * @return this matcher 945 * @see java.util.regex.Matcher#hasAnchoringBounds 946 * @since 1.5 947 */ 948 public Matcher useAnchoringBounds(boolean value) { 949 synchronized (this) { 950 anchoringBounds = value; 951 useAnchoringBoundsImpl(address, value); 952 } 953 return this; 954 } 955 956 /** 957 * <p>Returns the string representation of this matcher. The 958 * string representation of a <code>Matcher</code> contains information 959 * that may be useful for debugging. The exact format is unspecified. 960 * 961 * @return The string representation of this matcher 962 * @since 1.5 963 */ 964 public String toString() { 965 StringBuilder sb = new StringBuilder(); 966 sb.append("java.util.regex.Matcher"); 967 sb.append("[pattern=" + pattern()); 968 sb.append(" region="); 969 sb.append(regionStart() + "," + regionEnd()); 970 sb.append(" lastmatch="); 971 if (matchFound && (group() != null)) { 972 sb.append(group()); 973 } 974 sb.append("]"); 975 return sb.toString(); 976 } 977 978 /** 979 * <p>Returns true if the end of input was hit by the search engine in 980 * the last match operation performed by this matcher. 981 * 982 * <p>When this method returns true, then it is possible that more input 983 * would have changed the result of the last search. 984 * 985 * @return true iff the end of input was hit in the last match; false 986 * otherwise 987 * @since 1.5 988 */ 989 public boolean hitEnd() { 990 synchronized (this) { 991 return hitEndImpl(address); 992 } 993 } 994 995 996 /** 997 * <p>Returns true if more input could change a positive match into a 998 * negative one. 999 * 1000 * <p>If this method returns true, and a match was found, then more 1001 * input could cause the match to be lost. If this method returns false 1002 * and a match was found, then more input might change the match but the 1003 * match won't be lost. If a match was not found, then requireEnd has no 1004 * meaning. 1005 * 1006 * @return true iff more input could change a positive match into a 1007 * negative one. 1008 * @since 1.5 1009 */ 1010 public boolean requireEnd() { 1011 synchronized (this) { 1012 return requireEndImpl(address); 1013 } 1014 } 1015 1016 /** 1017 * Resets this matcher. 1018 * 1019 * <p> Resetting a matcher discards all of its explicit state information 1020 * and sets its append position to zero. The matcher's region is set to the 1021 * default region, which is its entire character sequence. The anchoring 1022 * and transparency of this matcher's region boundaries are unaffected. 1023 * 1024 * @return This matcher 1025 */ 1026 public Matcher reset() { 1027 return reset(input, 0, input.length()); 1028 } 1029 1030 /** 1031 * Resets this matcher with a new input sequence. 1032 * 1033 * <p> Resetting a matcher discards all of its explicit state information 1034 * and sets its append position to zero. The matcher's region is set to 1035 * the default region, which is its entire character sequence. The 1036 * anchoring and transparency of this matcher's region boundaries are 1037 * unaffected. 1038 * 1039 * @param input 1040 * The new input character sequence 1041 * 1042 * @return This matcher 1043 */ 1044 public Matcher reset(CharSequence input) { 1045 return reset(input, 0, input.length()); 1046 } 1047 1048 /** 1049 * Resets the Matcher. A new input sequence and a new region can be 1050 * specified. Results of a previous find get lost. The next attempt to find 1051 * an occurrence of the Pattern in the string will start at the beginning of 1052 * the region. This is the internal version of reset() to which the several 1053 * public versions delegate. 1054 * 1055 * @param input 1056 * the input sequence. 1057 * @param start 1058 * the start of the region. 1059 * @param end 1060 * the end of the region. 1061 * 1062 * @return the matcher itself. 1063 */ 1064 private Matcher reset(CharSequence input, int start, int end) { 1065 if (input == null) { 1066 throw new IllegalArgumentException("input == null"); 1067 } 1068 1069 if (start < 0 || end < 0 || start > input.length() || end > input.length() || start > end) { 1070 throw new IndexOutOfBoundsException(); 1071 } 1072 1073 this.input = input.toString(); 1074 this.regionStart = start; 1075 this.regionEnd = end; 1076 resetForInput(); 1077 1078 matchFound = false; 1079 appendPos = 0; 1080 1081 return this; 1082 } 1083 1084 private void resetForInput() { 1085 synchronized (this) { 1086 setInputImpl(address, input, regionStart, regionEnd); 1087 useAnchoringBoundsImpl(address, anchoringBounds); 1088 useTransparentBoundsImpl(address, transparentBounds); 1089 } 1090 } 1091 1092 /** 1093 * Makes sure that a successful match has been made. Is invoked internally 1094 * from various places in the class. 1095 * 1096 * @throws IllegalStateException 1097 * if no successful match has been made. 1098 */ 1099 private void ensureMatch() { 1100 if (!matchFound) { 1101 throw new IllegalStateException("No successful match so far"); 1102 } 1103 } 1104 1105 /** 1106 * Returns the start index of the previous match. </p> 1107 * 1108 * @return The index of the first character matched 1109 * 1110 * @throws IllegalStateException 1111 * If no match has yet been attempted, 1112 * or if the previous match operation failed 1113 */ 1114 public int start() { 1115 return start(0); 1116 } 1117 1118 /** 1119 * Returns the start index of the subsequence captured by the given group 1120 * during the previous match operation. 1121 * 1122 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left 1123 * to right, starting at one. Group zero denotes the entire pattern, so 1124 * the expression <i>m.</i><tt>start(0)</tt> is equivalent to 1125 * <i>m.</i><tt>start()</tt>. </p> 1126 * 1127 * @param group 1128 * The index of a capturing group in this matcher's pattern 1129 * 1130 * @return The index of the first character captured by the group, 1131 * or <tt>-1</tt> if the match was successful but the group 1132 * itself did not match anything 1133 * 1134 * @throws IllegalStateException 1135 * If no match has yet been attempted, 1136 * or if the previous match operation failed 1137 * 1138 * @throws IndexOutOfBoundsException 1139 * If there is no capturing group in the pattern 1140 * with the given index 1141 */ 1142 public int start(int group) throws IllegalStateException { 1143 ensureMatch(); 1144 return matchOffsets[group * 2]; 1145 } 1146 1147 1148 /** 1149 * Returns the start index of the subsequence captured by the given 1150 * <a href="Pattern.html#groupname">named-capturing group</a> during the 1151 * previous match operation. 1152 * 1153 * @param name 1154 * The name of a named-capturing group in this matcher's pattern 1155 * 1156 * @return The index of the first character captured by the group, 1157 * or {@code -1} if the match was successful but the group 1158 * itself did not match anything 1159 * 1160 * @throws IllegalStateException 1161 * If no match has yet been attempted, 1162 * or if the previous match operation failed 1163 * 1164 * @throws IllegalArgumentException 1165 * If there is no capturing group in the pattern 1166 * with the given name 1167 * @since 1.8 1168 */ 1169 public int start(String name) { 1170 ensureMatch(); 1171 return matchOffsets[getMatchedGroupIndex(pattern.address, name) * 2]; 1172 } 1173 1174 private static int getMatchedGroupIndex(long patternAddr, String name) { 1175 int result = getMatchedGroupIndex0(patternAddr, name); 1176 if (result < 0) { 1177 throw new IllegalArgumentException("No capturing group in the pattern " + 1178 "with the name " + name); 1179 } 1180 return result; 1181 } 1182 1183 private static native int getMatchedGroupIndex0(long patternAddr, String name); 1184 private static native boolean findImpl(long addr, int startIndex, int[] offsets); 1185 private static native boolean findNextImpl(long addr, int[] offsets); 1186 private static native long getNativeFinalizer(); 1187 private static native int groupCountImpl(long addr); 1188 private static native boolean hitEndImpl(long addr); 1189 private static native boolean lookingAtImpl(long addr, int[] offsets); 1190 private static native boolean matchesImpl(long addr, int[] offsets); 1191 private static native int nativeSize(); 1192 private static native long openImpl(long patternAddr); 1193 private static native boolean requireEndImpl(long addr); 1194 private static native void setInputImpl(long addr, String s, int start, int end); 1195 private static native void useAnchoringBoundsImpl(long addr, boolean value); 1196 private static native void useTransparentBoundsImpl(long addr, boolean value); 1197 1198 /** 1199 * A trivial match result implementation that's based on an array of integers 1200 * representing match offsets. The array is of the form 1201 * {@code { start1, end1, start2, end2 ....}) where each consecutive pair of elements represents 1202 * the start and end of a match respectively. 1203 */ 1204 static final class OffsetBasedMatchResult implements MatchResult { 1205 private final String input; 1206 private final int[] offsets; 1207 1208 OffsetBasedMatchResult(String input, int[] offsets) { 1209 this.input = input; 1210 this.offsets = offsets.clone(); 1211 } 1212 1213 @Override 1214 public int start() { 1215 return start(0); 1216 } 1217 1218 @Override 1219 public int start(int group) { 1220 return offsets[2 * group]; 1221 } 1222 1223 @Override 1224 public int end() { 1225 return end(0); 1226 } 1227 1228 @Override 1229 public int end(int group) { 1230 return offsets[2 * group + 1]; 1231 } 1232 1233 @Override 1234 public String group() { 1235 return group(0); 1236 } 1237 1238 @Override 1239 public String group(int group) { 1240 final int start = start(group); 1241 final int end = end(group); 1242 if (start == -1 || end == -1) { 1243 return null; 1244 } 1245 1246 return input.substring(start, end); 1247 } 1248 1249 @Override 1250 public int groupCount() { 1251 return (offsets.length / 2) - 1; 1252 } 1253 } 1254} 1255