Matcher.java revision 4f5f16ccda77bc0448b42e4dc36da50e0c100591
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.  Oracle designates this
9 * particular file as subject to the "Classpath" exception as provided
10 * by Oracle in the LICENSE file that accompanied this code.
11 *
12 * This code is distributed in the hope that it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 * version 2 for more details (a copy is included in the LICENSE file that
16 * accompanied this code).
17 *
18 * You should have received a copy of the GNU General Public License version
19 * 2 along with this work; if not, write to the Free Software Foundation,
20 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
21 *
22 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
23 * or visit www.oracle.com if you need additional information or have any
24 * questions.
25 */
26
27package java.util.regex;
28
29import libcore.util.NativeAllocationRegistry;
30
31/**
32 * An engine that performs match operations on a {@link java.lang.CharSequence
33 * </code>character sequence<code>} by interpreting a {@link Pattern}.
34 *
35 * <p> A matcher is created from a pattern by invoking the pattern's {@link
36 * Pattern#matcher matcher} method.  Once created, a matcher can be used to
37 * perform three different kinds of match operations:
38 *
39 * <ul>
40 *
41 *   <li><p> The {@link #matches matches} method attempts to match the entire
42 *   input sequence against the pattern.  </p></li>
43 *
44 *   <li><p> The {@link #lookingAt lookingAt} method attempts to match the
45 *   input sequence, starting at the beginning, against the pattern.  </p></li>
46 *
47 *   <li><p> The {@link #find find} method scans the input sequence looking for
48 *   the next subsequence that matches the pattern.  </p></li>
49 *
50 * </ul>
51 *
52 * <p> Each of these methods returns a boolean indicating success or failure.
53 * More information about a successful match can be obtained by querying the
54 * state of the matcher.
55 *
56 * <p> A matcher finds matches in a subset of its input called the
57 * <i>region</i>. By default, the region contains all of the matcher's input.
58 * The region can be modified via the{@link #region region} method and queried
59 * via the {@link #regionStart regionStart} and {@link #regionEnd regionEnd}
60 * methods. The way that the region boundaries interact with some pattern
61 * constructs can be changed. See {@link #useAnchoringBounds
62 * useAnchoringBounds} and {@link #useTransparentBounds useTransparentBounds}
63 * for more details.
64 *
65 * <p> This class also defines methods for replacing matched subsequences with
66 * new strings whose contents can, if desired, be computed from the match
67 * result.  The {@link #appendReplacement appendReplacement} and {@link
68 * #appendTail appendTail} methods can be used in tandem in order to collect
69 * the result into an existing string buffer, or the more convenient {@link
70 * #replaceAll replaceAll} method can be used to create a string in which every
71 * matching subsequence in the input sequence is replaced.
72 *
73 * <p> The explicit state of a matcher includes the start and end indices of
74 * the most recent successful match.  It also includes the start and end
75 * indices of the input subsequence captured by each <a
76 * href="Pattern.html#cg">capturing group</a> in the pattern as well as a total
77 * count of such subsequences.  As a convenience, methods are also provided for
78 * returning these captured subsequences in string form.
79 *
80 * <p> The explicit state of a matcher is initially undefined; attempting to
81 * query any part of it before a successful match will cause an {@link
82 * IllegalStateException} to be thrown.  The explicit state of a matcher is
83 * recomputed by every match operation.
84 *
85 * <p> The implicit state of a matcher includes the input character sequence as
86 * well as the <i>append position</i>, which is initially zero and is updated
87 * by the {@link #appendReplacement appendReplacement} method.
88 *
89 * <p> A matcher may be reset explicitly by invoking its {@link #reset()}
90 * method or, if a new input sequence is desired, its {@link
91 * #reset(java.lang.CharSequence) reset(CharSequence)} method.  Resetting a
92 * matcher discards its explicit state information and sets the append position
93 * to zero.
94 *
95 * <p> Instances of this class are not safe for use by multiple concurrent
96 * threads. </p>
97 *
98 *
99 * @author      Mike McCloskey
100 * @author      Mark Reinhold
101 * @author      JSR-51 Expert Group
102 * @since       1.4
103 * @spec        JSR-51
104 */
105
106public final class Matcher implements MatchResult {
107    /**
108     * The Pattern object that created this Matcher.
109     */
110    private Pattern pattern;
111
112    /**
113     * The address of the native peer.
114     * Uses of this must be manually synchronized to avoid native crashes.
115     */
116    private long address;
117
118    /**
119     * If non-null, a Runnable that can be used to explicitly deallocate address.
120     */
121    private Runnable nativeFinalizer;
122
123    private static final NativeAllocationRegistry registry = new NativeAllocationRegistry(
124            Matcher.class.getClassLoader(), getNativeFinalizer(), nativeSize());
125
126    /**
127     * Holds the original CharSequence for use in {@link #reset}. {@link #input} is used during
128     * matching. Note that CharSequence is mutable while String is not, so reset can cause the input
129     * to match to change.
130     */
131    private CharSequence originalInput;
132
133    /**
134     * Holds the input text.
135     */
136    private String input;
137
138    /**
139     * Holds the start of the region, or 0 if the matching should start at the
140     * beginning of the text.
141     */
142    private int regionStart;
143
144    /**
145     * Holds the end of the region, or input.length() if the matching should
146     * go until the end of the input.
147     */
148    private int regionEnd;
149
150    /**
151     * Holds the position where the next append operation will take place.
152     */
153    private int appendPos;
154
155    /**
156     * Reflects whether a match has been found during the most recent find
157     * operation.
158     */
159    private boolean matchFound;
160
161    /**
162     * Holds the offsets for the most recent match.
163     */
164    private int[] matchOffsets;
165
166    /**
167     * Reflects whether the bounds of the region are anchoring.
168     */
169    private boolean anchoringBounds = true;
170
171    /**
172     * Reflects whether the bounds of the region are transparent.
173     */
174    private boolean transparentBounds;
175
176    /**
177     * All matchers have the state used by Pattern during a match.
178     */
179    Matcher(Pattern parent, CharSequence text) {
180        usePattern(parent);
181        reset(text);
182    }
183
184    /**
185     * Returns the pattern that is interpreted by this matcher.
186     *
187     * @return  The pattern for which this matcher was created
188     */
189    public Pattern pattern() {
190        return pattern;
191    }
192
193    /**
194     * Returns the match state of this matcher as a {@link MatchResult}.
195     * The result is unaffected by subsequent operations performed upon this
196     * matcher.
197     *
198     * @return  a <code>MatchResult</code> with the state of this matcher
199     * @since 1.5
200     */
201    public MatchResult toMatchResult() {
202        ensureMatch();
203        return new OffsetBasedMatchResult(input, matchOffsets);
204    }
205
206    /**
207      * Changes the <tt>Pattern</tt> that this <tt>Matcher</tt> uses to
208      * find matches with.
209      *
210      * <p> This method causes this matcher to lose information
211      * about the groups of the last match that occurred. The
212      * matcher's position in the input is maintained and its
213      * last append position is unaffected.</p>
214      *
215      * @param  newPattern
216      *         The new pattern used by this matcher
217      * @return  This matcher
218      * @throws  IllegalArgumentException
219      *          If newPattern is <tt>null</tt>
220      * @since 1.5
221      */
222    public Matcher usePattern(Pattern newPattern) {
223        if (newPattern == null) {
224            throw new IllegalArgumentException("newPattern == null");
225        }
226
227        this.pattern = newPattern;
228
229        synchronized (this) {
230            if (nativeFinalizer != null) {
231                nativeFinalizer.run();
232                address = 0; // In case openImpl throws.
233                nativeFinalizer = null;
234            }
235            address = openImpl(pattern.address);
236            nativeFinalizer = registry.registerNativeAllocation(this, address);
237        }
238
239        if (input != null) {
240            resetForInput();
241        }
242
243        matchOffsets = new int[(groupCount() + 1) * 2];
244        matchFound = false;
245        return this;
246    }
247
248    /**
249     * Returns the offset after the last character matched.  </p>
250     *
251     * @return  The offset after the last character matched
252     *
253     * @throws  IllegalStateException
254     *          If no match has yet been attempted,
255     *          or if the previous match operation failed
256     */
257    public int end() {
258        return end(0);
259    }
260
261    /**
262     * Returns the offset after the last character of the subsequence
263     * captured by the given group during the previous match operation.
264     *
265     * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
266     * to right, starting at one.  Group zero denotes the entire pattern, so
267     * the expression <i>m.</i><tt>end(0)</tt> is equivalent to
268     * <i>m.</i><tt>end()</tt>.  </p>
269     *
270     * @param  group
271     *         The index of a capturing group in this matcher's pattern
272     *
273     * @return  The offset after the last character captured by the group,
274     *          or <tt>-1</tt> if the match was successful
275     *          but the group itself did not match anything
276     *
277     * @throws  IllegalStateException
278     *          If no match has yet been attempted,
279     *          or if the previous match operation failed
280     *
281     * @throws  IndexOutOfBoundsException
282     *          If there is no capturing group in the pattern
283     *          with the given index
284     */
285    public int end(int group) {
286        ensureMatch();
287        return matchOffsets[(group * 2) + 1];
288    }
289
290    /**
291     * Returns the offset after the last character of the subsequence
292     * captured by the given <a href="Pattern.html#groupname">named-capturing
293     * group</a> during the previous match operation.
294     *
295     * @param  name
296     *         The name of a named-capturing group in this matcher's pattern
297     *
298     * @return  The offset after the last character captured by the group,
299     *          or {@code -1} if the match was successful
300     *          but the group itself did not match anything
301     *
302     * @throws  IllegalStateException
303     *          If no match has yet been attempted,
304     *          or if the previous match operation failed
305     *
306     * @throws  IllegalArgumentException
307     *          If there is no capturing group in the pattern
308     *          with the given name
309     * @since 1.8
310     */
311    public int end(String name) {
312        ensureMatch();
313        return matchOffsets[getMatchedGroupIndex(pattern.address, name) * 2 + 1];
314    }
315
316
317    /**
318     * Returns the input subsequence matched by the previous match.
319     *
320     * <p> For a matcher <i>m</i> with input sequence <i>s</i>,
321     * the expressions <i>m.</i><tt>group()</tt> and
322     * <i>s.</i><tt>substring(</tt><i>m.</i><tt>start(),</tt>&nbsp;<i>m.</i><tt>end())</tt>
323     * are equivalent.  </p>
324     *
325     * <p> Note that some patterns, for example <tt>a*</tt>, match the empty
326     * string.  This method will return the empty string when the pattern
327     * successfully matches the empty string in the input.  </p>
328     *
329     * @return The (possibly empty) subsequence matched by the previous match,
330     *         in string form
331     *
332     * @throws  IllegalStateException
333     *          If no match has yet been attempted,
334     *          or if the previous match operation failed
335     */
336    public String group() {
337        return group(0);
338    }
339
340    /**
341     * Returns the input subsequence captured by the given group during the
342     * previous match operation.
343     *
344     * <p> For a matcher <i>m</i>, input sequence <i>s</i>, and group index
345     * <i>g</i>, the expressions <i>m.</i><tt>group(</tt><i>g</i><tt>)</tt> and
346     * <i>s.</i><tt>substring(</tt><i>m.</i><tt>start(</tt><i>g</i><tt>),</tt>&nbsp;<i>m.</i><tt>end(</tt><i>g</i><tt>))</tt>
347     * are equivalent.  </p>
348     *
349     * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
350     * to right, starting at one.  Group zero denotes the entire pattern, so
351     * the expression <tt>m.group(0)</tt> is equivalent to <tt>m.group()</tt>.
352     * </p>
353     *
354     * <p> If the match was successful but the group specified failed to match
355     * any part of the input sequence, then <tt>null</tt> is returned. Note
356     * that some groups, for example <tt>(a*)</tt>, match the empty string.
357     * This method will return the empty string when such a group successfully
358     * matches the empty string in the input.  </p>
359     *
360     * @param  group
361     *         The index of a capturing group in this matcher's pattern
362     *
363     * @return  The (possibly empty) subsequence captured by the group
364     *          during the previous match, or <tt>null</tt> if the group
365     *          failed to match part of the input
366     *
367     * @throws  IllegalStateException
368     *          If no match has yet been attempted,
369     *          or if the previous match operation failed
370     *
371     * @throws  IndexOutOfBoundsException
372     *          If there is no capturing group in the pattern
373     *          with the given index
374     */
375    public String group(int group) {
376        ensureMatch();
377        int from = matchOffsets[group * 2];
378        int to = matchOffsets[(group * 2) + 1];
379        if (from == -1 || to == -1) {
380            return null;
381        } else {
382            return input.substring(from, to);
383        }
384    }
385
386    /**
387     * Returns the input subsequence captured by the given
388     * <a href="Pattern.html#groupname">named-capturing group</a> during the previous
389     * match operation.
390     *
391     * <p> If the match was successful but the group specified failed to match
392     * any part of the input sequence, then <tt>null</tt> is returned. Note
393     * that some groups, for example <tt>(a*)</tt>, match the empty string.
394     * This method will return the empty string when such a group successfully
395     * matches the empty string in the input.  </p>
396     *
397     * @param  name
398     *         The name of a named-capturing group in this matcher's pattern
399     *
400     * @return  The (possibly empty) subsequence captured by the named group
401     *          during the previous match, or <tt>null</tt> if the group
402     *          failed to match part of the input
403     *
404     * @throws  IllegalStateException
405     *          If no match has yet been attempted,
406     *          or if the previous match operation failed
407     *
408     * @throws  IllegalArgumentException
409     *          If there is no capturing group in the pattern
410     *          with the given name
411     * @since 1.7
412     */
413    public String group(String name) {
414        ensureMatch();
415        int group = getMatchedGroupIndex(pattern.address, name);
416        int from = matchOffsets[group * 2];
417        int to = matchOffsets[(group * 2) + 1];
418        if (from == -1 || to == -1) {
419            return null;
420        } else {
421            return input.substring(from, to);
422        }
423    }
424
425    /**
426     * Returns the number of capturing groups in this matcher's pattern.
427     *
428     * <p> Group zero denotes the entire pattern by convention. It is not
429     * included in this count.
430     *
431     * <p> Any non-negative integer smaller than or equal to the value
432     * returned by this method is guaranteed to be a valid group index for
433     * this matcher.  </p>
434     *
435     * @return The number of capturing groups in this matcher's pattern
436     */
437    public int groupCount() {
438        synchronized (this) {
439            return groupCountImpl(address);
440        }
441    }
442
443    /**
444     * Attempts to match the entire region against the pattern.
445     *
446     * <p> If the match succeeds then more information can be obtained via the
447     * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods.  </p>
448     *
449     * @return  <tt>true</tt> if, and only if, the entire region sequence
450     *          matches this matcher's pattern
451     */
452    public boolean matches() {
453        synchronized (this) {
454            matchFound = matchesImpl(address, matchOffsets);
455        }
456        return matchFound;
457    }
458
459    /**
460     * Attempts to find the next subsequence of the input sequence that matches
461     * the pattern.
462     *
463     * <p> This method starts at the beginning of this matcher's region, or, if
464     * a previous invocation of the method was successful and the matcher has
465     * not since been reset, at the first character not matched by the previous
466     * match.
467     *
468     * <p> If the match succeeds then more information can be obtained via the
469     * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods.  </p>
470     *
471     * @return  <tt>true</tt> if, and only if, a subsequence of the input
472     *          sequence matches this matcher's pattern
473     */
474    public boolean find() {
475        synchronized (this) {
476            matchFound = findNextImpl(address, matchOffsets);
477        }
478        return matchFound;
479    }
480
481    /**
482     * Resets this matcher and then attempts to find the next subsequence of
483     * the input sequence that matches the pattern, starting at the specified
484     * index.
485     *
486     * <p> If the match succeeds then more information can be obtained via the
487     * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods, and subsequent
488     * invocations of the {@link #find()} method will start at the first
489     * character not matched by this match.  </p>
490     *
491     * @throws  IndexOutOfBoundsException
492     *          If start is less than zero or if start is greater than the
493     *          length of the input sequence.
494     *
495     * @return  <tt>true</tt> if, and only if, a subsequence of the input
496     *          sequence starting at the given index matches this matcher's
497     *          pattern
498     */
499    public boolean find(int start) {
500        reset();
501        if (start < 0 || start > input.length()) {
502            throw new IndexOutOfBoundsException("start=" + start + "; length=" + input.length());
503        }
504
505        synchronized (this) {
506            matchFound = findImpl(address, start, matchOffsets);
507        }
508        return matchFound;
509    }
510
511    /**
512     * Attempts to match the input sequence, starting at the beginning of the
513     * region, against the pattern.
514     *
515     * <p> Like the {@link #matches matches} method, this method always starts
516     * at the beginning of the region; unlike that method, it does not
517     * require that the entire region be matched.
518     *
519     * <p> If the match succeeds then more information can be obtained via the
520     * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods.  </p>
521     *
522     * @return  <tt>true</tt> if, and only if, a prefix of the input
523     *          sequence matches this matcher's pattern
524     */
525    public boolean lookingAt() {
526        synchronized (this) {
527            matchFound = lookingAtImpl(address, matchOffsets);
528        }
529        return matchFound;
530    }
531
532    /**
533     * Returns a literal replacement <code>String</code> for the specified
534     * <code>String</code>.
535     *
536     * This method produces a <code>String</code> that will work
537     * as a literal replacement <code>s</code> in the
538     * <code>appendReplacement</code> method of the {@link Matcher} class.
539     * The <code>String</code> produced will match the sequence of characters
540     * in <code>s</code> treated as a literal sequence. Slashes ('\') and
541     * dollar signs ('$') will be given no special meaning.
542     *
543     * @param  s The string to be literalized
544     * @return  A literal string replacement
545     * @since 1.5
546     */
547    public static String quoteReplacement(String s) {
548        if ((s.indexOf('\\') == -1) && (s.indexOf('$') == -1))
549            return s;
550        StringBuilder sb = new StringBuilder();
551        for (int i=0; i<s.length(); i++) {
552            char c = s.charAt(i);
553            if (c == '\\' || c == '$') {
554                sb.append('\\');
555            }
556            sb.append(c);
557        }
558        return sb.toString();
559    }
560
561    /**
562     * Implements a non-terminal append-and-replace step.
563     *
564     * <p> This method performs the following actions: </p>
565     *
566     * <ol>
567     *
568     *   <li><p> It reads characters from the input sequence, starting at the
569     *   append position, and appends them to the given string buffer.  It
570     *   stops after reading the last character preceding the previous match,
571     *   that is, the character at index {@link
572     *   #start()}&nbsp;<tt>-</tt>&nbsp;<tt>1</tt>.  </p></li>
573     *
574     *   <li><p> It appends the given replacement string to the string buffer.
575     *   </p></li>
576     *
577     *   <li><p> It sets the append position of this matcher to the index of
578     *   the last character matched, plus one, that is, to {@link #end()}.
579     *   </p></li>
580     *
581     * </ol>
582     *
583     * <p> The replacement string may contain references to subsequences
584     * captured during the previous match: Each occurrence of
585     * <tt>$</tt><i>g</i> will be replaced by the result of evaluating the corresponding
586     * {@link #group(int) group(g)</tt>} respectively. For  <tt>$</tt><i>g</i><tt></tt>,
587     * the first number after the <tt>$</tt> is always treated as part of
588     * the group reference. Subsequent numbers are incorporated into g if
589     * they would form a legal group reference. Only the numerals '0'
590     * through '9' are considered as potential components of the group
591     * reference. If the second group matched the string <tt>"foo"</tt>, for
592     * example, then passing the replacement string <tt>"$2bar"</tt> would
593     * cause <tt>"foobar"</tt> to be appended to the string buffer. A dollar
594     * sign (<tt>$</tt>) may be included as a literal in the replacement
595     * string by preceding it with a backslash (<tt>\$</tt>).
596     *
597     * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
598     * the replacement string may cause the results to be different than if it
599     * were being treated as a literal replacement string. Dollar signs may be
600     * treated as references to captured subsequences as described above, and
601     * backslashes are used to escape literal characters in the replacement
602     * string.
603     *
604     * <p> This method is intended to be used in a loop together with the
605     * {@link #appendTail appendTail} and {@link #find find} methods.  The
606     * following code, for example, writes <tt>one dog two dogs in the
607     * yard</tt> to the standard-output stream: </p>
608     *
609     * <blockquote><pre>
610     * Pattern p = Pattern.compile("cat");
611     * Matcher m = p.matcher("one cat two cats in the yard");
612     * StringBuffer sb = new StringBuffer();
613     * while (m.find()) {
614     *     m.appendReplacement(sb, "dog");
615     * }
616     * m.appendTail(sb);
617     * System.out.println(sb.toString());</pre></blockquote>
618     *
619     * @param  sb
620     *         The target string buffer
621     *
622     * @param  replacement
623     *         The replacement string
624     *
625     * @return  This matcher
626     *
627     * @throws  IllegalStateException
628     *          If no match has yet been attempted,
629     *          or if the previous match operation failed
630     *
631     * @throws  IllegalArgumentException
632     *          If the replacement string refers to a named-capturing
633     *          group that does not exist in the pattern
634     *
635     * @throws  IndexOutOfBoundsException
636     *          If the replacement string refers to a capturing group
637     *          that does not exist in the pattern
638     */
639    public Matcher appendReplacement(StringBuffer sb, String replacement) {
640        sb.append(input.substring(appendPos, start()));
641        appendEvaluated(sb, replacement);
642        appendPos = end();
643
644        return this;
645    }
646
647    /**
648     * Internal helper method to append a given string to a given string buffer.
649     * If the string contains any references to groups, these are replaced by
650     * the corresponding group's contents.
651     *
652     * @param buffer the string buffer.
653     * @param s the string to append.
654     */
655    private void appendEvaluated(StringBuffer buffer, String s) {
656        boolean escape = false;
657        boolean dollar = false;
658        boolean escapeNamedGroup = false;
659        int escapeNamedGroupStart = -1;
660
661        for (int i = 0; i < s.length(); i++) {
662            char c = s.charAt(i);
663            if (c == '\\' && !escape) {
664                escape = true;
665            } else if (c == '$' && !escape) {
666                dollar = true;
667            } else if (c >= '0' && c <= '9' && dollar) {
668                buffer.append(group(c - '0'));
669                dollar = false;
670            } else if (c == '{' && dollar) {
671                escapeNamedGroup = true;
672                escapeNamedGroupStart = i;
673            } else if (c == '}' && dollar && escapeNamedGroup) {
674                String namedGroupName =
675                    s.substring(escapeNamedGroupStart + 1, i);
676                buffer.append(group(namedGroupName));
677                dollar = false;
678                escapeNamedGroup = false;
679            } else if (c != '}' && dollar && escapeNamedGroup) {
680                continue;
681            } else {
682                buffer.append(c);
683                dollar = false;
684                escape = false;
685                escapeNamedGroup = false;
686            }
687        }
688
689        if (escapeNamedGroup) {
690            throw new IllegalArgumentException("Missing ending brace '}' from replacement string");
691        }
692
693        if (escape) {
694            throw new ArrayIndexOutOfBoundsException(s.length());
695        }
696    }
697
698
699    /**
700     * Implements a terminal append-and-replace step.
701     *
702     * <p> This method reads characters from the input sequence, starting at
703     * the append position, and appends them to the given string buffer.  It is
704     * intended to be invoked after one or more invocations of the {@link
705     * #appendReplacement appendReplacement} method in order to copy the
706     * remainder of the input sequence.  </p>
707     *
708     * @param  sb
709     *         The target string buffer
710     *
711     * @return  The target string buffer
712     */
713    public StringBuffer appendTail(StringBuffer sb) {
714        if (appendPos < regionEnd) {
715            sb.append(input.substring(appendPos, regionEnd));
716        }
717        return sb;
718    }
719
720    /**
721     * Replaces every subsequence of the input sequence that matches the
722     * pattern with the given replacement string.
723     *
724     * <p> This method first resets this matcher.  It then scans the input
725     * sequence looking for matches of the pattern.  Characters that are not
726     * part of any match are appended directly to the result string; each match
727     * is replaced in the result by the replacement string.  The replacement
728     * string may contain references to captured subsequences as in the {@link
729     * #appendReplacement appendReplacement} method.
730     *
731     * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
732     * the replacement string may cause the results to be different than if it
733     * were being treated as a literal replacement string. Dollar signs may be
734     * treated as references to captured subsequences as described above, and
735     * backslashes are used to escape literal characters in the replacement
736     * string.
737     *
738     * <p> Given the regular expression <tt>a*b</tt>, the input
739     * <tt>"aabfooaabfooabfoob"</tt>, and the replacement string
740     * <tt>"-"</tt>, an invocation of this method on a matcher for that
741     * expression would yield the string <tt>"-foo-foo-foo-"</tt>.
742     *
743     * <p> Invoking this method changes this matcher's state.  If the matcher
744     * is to be used in further matching operations then it should first be
745     * reset.  </p>
746     *
747     * @param  replacement
748     *         The replacement string
749     *
750     * @return  The string constructed by replacing each matching subsequence
751     *          by the replacement string, substituting captured subsequences
752     *          as needed
753     */
754    public String replaceAll(String replacement) {
755        reset();
756        StringBuffer buffer = new StringBuffer(input.length());
757        while (find()) {
758            appendReplacement(buffer, replacement);
759        }
760        return appendTail(buffer).toString();
761    }
762
763    /**
764     * Replaces the first subsequence of the input sequence that matches the
765     * pattern with the given replacement string.
766     *
767     * <p> This method first resets this matcher.  It then scans the input
768     * sequence looking for a match of the pattern.  Characters that are not
769     * part of the match are appended directly to the result string; the match
770     * is replaced in the result by the replacement string.  The replacement
771     * string may contain references to captured subsequences as in the {@link
772     * #appendReplacement appendReplacement} method.
773     *
774     * <p>Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
775     * the replacement string may cause the results to be different than if it
776     * were being treated as a literal replacement string. Dollar signs may be
777     * treated as references to captured subsequences as described above, and
778     * backslashes are used to escape literal characters in the replacement
779     * string.
780     *
781     * <p> Given the regular expression <tt>dog</tt>, the input
782     * <tt>"zzzdogzzzdogzzz"</tt>, and the replacement string
783     * <tt>"cat"</tt>, an invocation of this method on a matcher for that
784     * expression would yield the string <tt>"zzzcatzzzdogzzz"</tt>.  </p>
785     *
786     * <p> Invoking this method changes this matcher's state.  If the matcher
787     * is to be used in further matching operations then it should first be
788     * reset.  </p>
789     *
790     * @param  replacement
791     *         The replacement string
792     * @return  The string constructed by replacing the first matching
793     *          subsequence by the replacement string, substituting captured
794     *          subsequences as needed
795     */
796    public String replaceFirst(String replacement) {
797        reset();
798        StringBuffer buffer = new StringBuffer(input.length());
799        if (find()) {
800            appendReplacement(buffer, replacement);
801        }
802        return appendTail(buffer).toString();
803    }
804
805    /**
806     * Sets the limits of this matcher's region. The region is the part of the
807     * input sequence that will be searched to find a match. Invoking this
808     * method resets the matcher, and then sets the region to start at the
809     * index specified by the <code>start</code> parameter and end at the
810     * index specified by the <code>end</code> parameter.
811     *
812     * <p>Depending on the transparency and anchoring being used (see
813     * {@link #useTransparentBounds useTransparentBounds} and
814     * {@link #useAnchoringBounds useAnchoringBounds}), certain constructs such
815     * as anchors may behave differently at or around the boundaries of the
816     * region.
817     *
818     * @param  start
819     *         The index to start searching at (inclusive)
820     * @param  end
821     *         The index to end searching at (exclusive)
822     * @throws  IndexOutOfBoundsException
823     *          If start or end is less than zero, if
824     *          start is greater than the length of the input sequence, if
825     *          end is greater than the length of the input sequence, or if
826     *          start is greater than end.
827     * @return  this matcher
828     * @since 1.5
829     */
830    public Matcher region(int start, int end) {
831        return reset(originalInput, start, end);
832    }
833
834    /**
835     * Reports the start index of this matcher's region. The
836     * searches this matcher conducts are limited to finding matches
837     * within {@link #regionStart regionStart} (inclusive) and
838     * {@link #regionEnd regionEnd} (exclusive).
839     *
840     * @return  The starting point of this matcher's region
841     * @since 1.5
842     */
843    public int regionStart() {
844        return regionStart;
845    }
846
847    /**
848     * Reports the end index (exclusive) of this matcher's region.
849     * The searches this matcher conducts are limited to finding matches
850     * within {@link #regionStart regionStart} (inclusive) and
851     * {@link #regionEnd regionEnd} (exclusive).
852     *
853     * @return  the ending point of this matcher's region
854     * @since 1.5
855     */
856    public int regionEnd() {
857        return regionEnd;
858    }
859
860    /**
861     * Queries the transparency of region bounds for this matcher.
862     *
863     * <p> This method returns <tt>true</tt> if this matcher uses
864     * <i>transparent</i> bounds, <tt>false</tt> if it uses <i>opaque</i>
865     * bounds.
866     *
867     * <p> See {@link #useTransparentBounds useTransparentBounds} for a
868     * description of transparent and opaque bounds.
869     *
870     * <p> By default, a matcher uses opaque region boundaries.
871     *
872     * @return <tt>true</tt> iff this matcher is using transparent bounds,
873     *         <tt>false</tt> otherwise.
874     * @see java.util.regex.Matcher#useTransparentBounds(boolean)
875     * @since 1.5
876     */
877    public boolean hasTransparentBounds() {
878        return transparentBounds;
879    }
880
881    /**
882     * Sets the transparency of region bounds for this matcher.
883     *
884     * <p> Invoking this method with an argument of <tt>true</tt> will set this
885     * matcher to use <i>transparent</i> bounds. If the boolean
886     * argument is <tt>false</tt>, then <i>opaque</i> bounds will be used.
887     *
888     * <p> Using transparent bounds, the boundaries of this
889     * matcher's region are transparent to lookahead, lookbehind,
890     * and boundary matching constructs. Those constructs can see beyond the
891     * boundaries of the region to see if a match is appropriate.
892     *
893     * <p> Using opaque bounds, the boundaries of this matcher's
894     * region are opaque to lookahead, lookbehind, and boundary matching
895     * constructs that may try to see beyond them. Those constructs cannot
896     * look past the boundaries so they will fail to match anything outside
897     * of the region.
898     *
899     * <p> By default, a matcher uses opaque bounds.
900     *
901     * @param  value a boolean indicating whether to use opaque or transparent
902     *         regions
903     * @return this matcher
904     * @see java.util.regex.Matcher#hasTransparentBounds
905     * @since 1.5
906     */
907    public Matcher useTransparentBounds(boolean value) {
908        synchronized (this) {
909            transparentBounds = value;
910            useTransparentBoundsImpl(address, value);
911        }
912        return this;
913    }
914
915    /**
916     * Queries the anchoring of region bounds for this matcher.
917     *
918     * <p> This method returns <tt>true</tt> if this matcher uses
919     * <i>anchoring</i> bounds, <tt>false</tt> otherwise.
920     *
921     * <p> See {@link #useAnchoringBounds useAnchoringBounds} for a
922     * description of anchoring bounds.
923     *
924     * <p> By default, a matcher uses anchoring region boundaries.
925     *
926     * @return <tt>true</tt> iff this matcher is using anchoring bounds,
927     *         <tt>false</tt> otherwise.
928     * @see java.util.regex.Matcher#useAnchoringBounds(boolean)
929     * @since 1.5
930     */
931    public boolean hasAnchoringBounds() {
932        return anchoringBounds;
933    }
934
935    /**
936     * Sets the anchoring of region bounds for this matcher.
937     *
938     * <p> Invoking this method with an argument of <tt>true</tt> will set this
939     * matcher to use <i>anchoring</i> bounds. If the boolean
940     * argument is <tt>false</tt>, then <i>non-anchoring</i> bounds will be
941     * used.
942     *
943     * <p> Using anchoring bounds, the boundaries of this
944     * matcher's region match anchors such as ^ and $.
945     *
946     * <p> Without anchoring bounds, the boundaries of this
947     * matcher's region will not match anchors such as ^ and $.
948     *
949     * <p> By default, a matcher uses anchoring region boundaries.
950     *
951     * @param  value a boolean indicating whether or not to use anchoring bounds.
952     * @return this matcher
953     * @see java.util.regex.Matcher#hasAnchoringBounds
954     * @since 1.5
955     */
956    public Matcher useAnchoringBounds(boolean value) {
957        synchronized (this) {
958            anchoringBounds = value;
959            useAnchoringBoundsImpl(address, value);
960        }
961        return this;
962    }
963
964    /**
965     * <p>Returns the string representation of this matcher. The
966     * string representation of a <code>Matcher</code> contains information
967     * that may be useful for debugging. The exact format is unspecified.
968     *
969     * @return  The string representation of this matcher
970     * @since 1.5
971     */
972    public String toString() {
973        StringBuilder sb = new StringBuilder();
974        sb.append("java.util.regex.Matcher");
975        sb.append("[pattern=" + pattern());
976        sb.append(" region=");
977        sb.append(regionStart() + "," + regionEnd());
978        sb.append(" lastmatch=");
979        if (matchFound && (group() != null)) {
980            sb.append(group());
981        }
982        sb.append("]");
983        return sb.toString();
984    }
985
986    /**
987     * <p>Returns true if the end of input was hit by the search engine in
988     * the last match operation performed by this matcher.
989     *
990     * <p>When this method returns true, then it is possible that more input
991     * would have changed the result of the last search.
992     *
993     * @return  true iff the end of input was hit in the last match; false
994     *          otherwise
995     * @since 1.5
996     */
997    public boolean hitEnd() {
998        synchronized (this) {
999            return hitEndImpl(address);
1000        }
1001    }
1002
1003
1004    /**
1005     * <p>Returns true if more input could change a positive match into a
1006     * negative one.
1007     *
1008     * <p>If this method returns true, and a match was found, then more
1009     * input could cause the match to be lost. If this method returns false
1010     * and a match was found, then more input might change the match but the
1011     * match won't be lost. If a match was not found, then requireEnd has no
1012     * meaning.
1013     *
1014     * @return  true iff more input could change a positive match into a
1015     *          negative one.
1016     * @since 1.5
1017     */
1018    public boolean requireEnd() {
1019        synchronized (this) {
1020            return requireEndImpl(address);
1021        }
1022    }
1023
1024    /**
1025     * Resets this matcher.
1026     *
1027     * <p> Resetting a matcher discards all of its explicit state information
1028     * and sets its append position to zero. The matcher's region is set to the
1029     * default region, which is its entire character sequence. The anchoring
1030     * and transparency of this matcher's region boundaries are unaffected.
1031     *
1032     * @return  This matcher
1033     */
1034    public Matcher reset() {
1035        return reset(originalInput, 0, originalInput.length());
1036    }
1037
1038    /**
1039     * Resets this matcher with a new input sequence.
1040     *
1041     * <p> Resetting a matcher discards all of its explicit state information
1042     * and sets its append position to zero.  The matcher's region is set to
1043     * the default region, which is its entire character sequence.  The
1044     * anchoring and transparency of this matcher's region boundaries are
1045     * unaffected.
1046     *
1047     * @param  input
1048     *         The new input character sequence
1049     *
1050     * @return  This matcher
1051     */
1052    public Matcher reset(CharSequence input) {
1053        return reset(input, 0, input.length());
1054    }
1055
1056    /**
1057     * Resets the Matcher. A new input sequence and a new region can be
1058     * specified. Results of a previous find get lost. The next attempt to find
1059     * an occurrence of the Pattern in the string will start at the beginning of
1060     * the region. This is the internal version of reset() to which the several
1061     * public versions delegate.
1062     *
1063     * @param input
1064     *            the input sequence.
1065     * @param start
1066     *            the start of the region.
1067     * @param end
1068     *            the end of the region.
1069     *
1070     * @return the matcher itself.
1071     */
1072    private Matcher reset(CharSequence input, int start, int end) {
1073        if (input == null) {
1074            throw new IllegalArgumentException("input == null");
1075        }
1076
1077        if (start < 0 || end < 0 || start > input.length() || end > input.length() || start > end) {
1078            throw new IndexOutOfBoundsException();
1079        }
1080
1081        this.originalInput = input;
1082        this.input = input.toString();
1083        this.regionStart = start;
1084        this.regionEnd = end;
1085        resetForInput();
1086
1087        matchFound = false;
1088        appendPos = 0;
1089
1090        return this;
1091    }
1092
1093    private void resetForInput() {
1094        synchronized (this) {
1095            setInputImpl(address, input, regionStart, regionEnd);
1096            useAnchoringBoundsImpl(address, anchoringBounds);
1097            useTransparentBoundsImpl(address, transparentBounds);
1098        }
1099    }
1100
1101    /**
1102     * Makes sure that a successful match has been made. Is invoked internally
1103     * from various places in the class.
1104     *
1105     * @throws IllegalStateException
1106     *             if no successful match has been made.
1107     */
1108    private void ensureMatch() {
1109        if (!matchFound) {
1110            throw new IllegalStateException("No successful match so far");
1111        }
1112    }
1113
1114    /**
1115     * Returns the start index of the previous match.  </p>
1116     *
1117     * @return  The index of the first character matched
1118     *
1119     * @throws  IllegalStateException
1120     *          If no match has yet been attempted,
1121     *          or if the previous match operation failed
1122     */
1123    public int start() {
1124        return start(0);
1125    }
1126
1127    /**
1128     * Returns the start index of the subsequence captured by the given group
1129     * during the previous match operation.
1130     *
1131     * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
1132     * to right, starting at one.  Group zero denotes the entire pattern, so
1133     * the expression <i>m.</i><tt>start(0)</tt> is equivalent to
1134     * <i>m.</i><tt>start()</tt>.  </p>
1135     *
1136     * @param  group
1137     *         The index of a capturing group in this matcher's pattern
1138     *
1139     * @return  The index of the first character captured by the group,
1140     *          or <tt>-1</tt> if the match was successful but the group
1141     *          itself did not match anything
1142     *
1143     * @throws  IllegalStateException
1144     *          If no match has yet been attempted,
1145     *          or if the previous match operation failed
1146     *
1147     * @throws  IndexOutOfBoundsException
1148     *          If there is no capturing group in the pattern
1149     *          with the given index
1150     */
1151    public int start(int group) throws IllegalStateException {
1152        ensureMatch();
1153        return matchOffsets[group * 2];
1154    }
1155
1156
1157    /**
1158     * Returns the start index of the subsequence captured by the given
1159     * <a href="Pattern.html#groupname">named-capturing group</a> during the
1160     * previous match operation.
1161     *
1162     * @param  name
1163     *         The name of a named-capturing group in this matcher's pattern
1164     *
1165     * @return  The index of the first character captured by the group,
1166     *          or {@code -1} if the match was successful but the group
1167     *          itself did not match anything
1168     *
1169     * @throws  IllegalStateException
1170     *          If no match has yet been attempted,
1171     *          or if the previous match operation failed
1172     *
1173     * @throws  IllegalArgumentException
1174     *          If there is no capturing group in the pattern
1175     *          with the given name
1176     * @since 1.8
1177     */
1178    public int start(String name) {
1179        ensureMatch();
1180        return matchOffsets[getMatchedGroupIndex(pattern.address, name) * 2];
1181    }
1182
1183    private static int getMatchedGroupIndex(long patternAddr, String name) {
1184        int result = getMatchedGroupIndex0(patternAddr, name);
1185        if (result < 0) {
1186            throw new IllegalArgumentException("No capturing group in the pattern " +
1187                                               "with the name " + name);
1188        }
1189        return result;
1190    }
1191
1192    private static native int getMatchedGroupIndex0(long patternAddr, String name);
1193    private static native boolean findImpl(long addr, int startIndex, int[] offsets);
1194    private static native boolean findNextImpl(long addr, int[] offsets);
1195    private static native long getNativeFinalizer();
1196    private static native int groupCountImpl(long addr);
1197    private static native boolean hitEndImpl(long addr);
1198    private static native boolean lookingAtImpl(long addr, int[] offsets);
1199    private static native boolean matchesImpl(long addr, int[] offsets);
1200    private static native int nativeSize();
1201    private static native long openImpl(long patternAddr);
1202    private static native boolean requireEndImpl(long addr);
1203    private static native void setInputImpl(long addr, String s, int start, int end);
1204    private static native void useAnchoringBoundsImpl(long addr, boolean value);
1205    private static native void useTransparentBoundsImpl(long addr, boolean value);
1206
1207    /**
1208     * A trivial match result implementation that's based on an array of integers
1209     * representing match offsets. The array is of the form
1210     * {@code { start1, end1, start2, end2 ....}) where each consecutive pair of elements represents
1211     * the start and end of a match respectively.
1212     */
1213    static final class OffsetBasedMatchResult implements MatchResult {
1214        private final String input;
1215        private final int[] offsets;
1216
1217        OffsetBasedMatchResult(String input, int[] offsets) {
1218            this.input = input;
1219            this.offsets = offsets.clone();
1220        }
1221
1222        @Override
1223        public int start() {
1224            return start(0);
1225        }
1226
1227        @Override
1228        public int start(int group) {
1229            return offsets[2 * group];
1230        }
1231
1232        @Override
1233        public int end() {
1234            return end(0);
1235        }
1236
1237        @Override
1238        public int end(int group) {
1239            return offsets[2 * group + 1];
1240        }
1241
1242        @Override
1243        public String group() {
1244            return group(0);
1245        }
1246
1247        @Override
1248        public String group(int group) {
1249            final int start = start(group);
1250            final int end = end(group);
1251            if (start == -1 || end == -1) {
1252                return null;
1253            }
1254
1255            return input.substring(start, end);
1256        }
1257
1258        @Override
1259        public int groupCount() {
1260            return (offsets.length / 2) - 1;
1261        }
1262    }
1263}
1264