1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.  Oracle designates this
9 * particular file as subject to the "Classpath" exception as provided
10 * by Oracle in the LICENSE file that accompanied this code.
11 *
12 * This code is distributed in the hope that it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 * version 2 for more details (a copy is included in the LICENSE file that
16 * accompanied this code).
17 *
18 * You should have received a copy of the GNU General Public License version
19 * 2 along with this work; if not, write to the Free Software Foundation,
20 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
21 *
22 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
23 * or visit www.oracle.com if you need additional information or have any
24 * questions.
25 */
26
27package java.util.regex;
28
29import dalvik.annotation.optimization.ReachabilitySensitive;
30import libcore.util.NativeAllocationRegistry;
31
32/**
33 * An engine that performs match operations on a {@link java.lang.CharSequence
34 * </code>character sequence<code>} by interpreting a {@link Pattern}.
35 *
36 * <p> A matcher is created from a pattern by invoking the pattern's {@link
37 * Pattern#matcher matcher} method.  Once created, a matcher can be used to
38 * perform three different kinds of match operations:
39 *
40 * <ul>
41 *
42 *   <li><p> The {@link #matches matches} method attempts to match the entire
43 *   input sequence against the pattern.  </p></li>
44 *
45 *   <li><p> The {@link #lookingAt lookingAt} method attempts to match the
46 *   input sequence, starting at the beginning, against the pattern.  </p></li>
47 *
48 *   <li><p> The {@link #find find} method scans the input sequence looking for
49 *   the next subsequence that matches the pattern.  </p></li>
50 *
51 * </ul>
52 *
53 * <p> Each of these methods returns a boolean indicating success or failure.
54 * More information about a successful match can be obtained by querying the
55 * state of the matcher.
56 *
57 * <p> A matcher finds matches in a subset of its input called the
58 * <i>region</i>. By default, the region contains all of the matcher's input.
59 * The region can be modified via the{@link #region region} method and queried
60 * via the {@link #regionStart regionStart} and {@link #regionEnd regionEnd}
61 * methods. The way that the region boundaries interact with some pattern
62 * constructs can be changed. See {@link #useAnchoringBounds
63 * useAnchoringBounds} and {@link #useTransparentBounds useTransparentBounds}
64 * for more details.
65 *
66 * <p> This class also defines methods for replacing matched subsequences with
67 * new strings whose contents can, if desired, be computed from the match
68 * result.  The {@link #appendReplacement appendReplacement} and {@link
69 * #appendTail appendTail} methods can be used in tandem in order to collect
70 * the result into an existing string buffer, or the more convenient {@link
71 * #replaceAll replaceAll} method can be used to create a string in which every
72 * matching subsequence in the input sequence is replaced.
73 *
74 * <p> The explicit state of a matcher includes the start and end indices of
75 * the most recent successful match.  It also includes the start and end
76 * indices of the input subsequence captured by each <a
77 * href="Pattern.html#cg">capturing group</a> in the pattern as well as a total
78 * count of such subsequences.  As a convenience, methods are also provided for
79 * returning these captured subsequences in string form.
80 *
81 * <p> The explicit state of a matcher is initially undefined; attempting to
82 * query any part of it before a successful match will cause an {@link
83 * IllegalStateException} to be thrown.  The explicit state of a matcher is
84 * recomputed by every match operation.
85 *
86 * <p> The implicit state of a matcher includes the input character sequence as
87 * well as the <i>append position</i>, which is initially zero and is updated
88 * by the {@link #appendReplacement appendReplacement} method.
89 *
90 * <p> A matcher may be reset explicitly by invoking its {@link #reset()}
91 * method or, if a new input sequence is desired, its {@link
92 * #reset(java.lang.CharSequence) reset(CharSequence)} method.  Resetting a
93 * matcher discards its explicit state information and sets the append position
94 * to zero.
95 *
96 * <p> Instances of this class are not safe for use by multiple concurrent
97 * threads. </p>
98 *
99 *
100 * @author      Mike McCloskey
101 * @author      Mark Reinhold
102 * @author      JSR-51 Expert Group
103 * @since       1.4
104 * @spec        JSR-51
105 */
106
107public final class Matcher implements MatchResult {
108    /**
109     * The Pattern object that created this Matcher.
110     */
111    // Patterns also contain cleanup code and a ReachabilitySensitive field.
112    // This ensures that "this" and pattern remain reachable while we're using pattern.address
113    // directly.
114    @ReachabilitySensitive
115    private Pattern pattern;
116
117    /**
118     * The address of the native peer.
119     * Uses of this must be manually synchronized to avoid native crashes.
120     */
121    @ReachabilitySensitive
122    private long address;
123
124    /**
125     * If non-null, a Runnable that can be used to explicitly deallocate address.
126     */
127    private Runnable nativeFinalizer;
128
129    private static final NativeAllocationRegistry registry = new NativeAllocationRegistry(
130            Matcher.class.getClassLoader(), getNativeFinalizer(), nativeSize());
131
132    /**
133     * Holds the original CharSequence for use in {@link #reset}. {@link #input} is used during
134     * matching. Note that CharSequence is mutable while String is not, so reset can cause the input
135     * to match to change.
136     */
137    private CharSequence originalInput;
138
139    /**
140     * Holds the input text.
141     */
142    private String input;
143
144    /**
145     * Holds the start of the region, or 0 if the matching should start at the
146     * beginning of the text.
147     */
148    private int regionStart;
149
150    /**
151     * Holds the end of the region, or input.length() if the matching should
152     * go until the end of the input.
153     */
154    private int regionEnd;
155
156    /**
157     * Holds the position where the next append operation will take place.
158     */
159    private int appendPos;
160
161    /**
162     * Reflects whether a match has been found during the most recent find
163     * operation.
164     */
165    private boolean matchFound;
166
167    /**
168     * Holds the offsets for the most recent match.
169     */
170    private int[] matchOffsets;
171
172    /**
173     * Reflects whether the bounds of the region are anchoring.
174     */
175    private boolean anchoringBounds = true;
176
177    /**
178     * Reflects whether the bounds of the region are transparent.
179     */
180    private boolean transparentBounds;
181
182    /**
183     * All matchers have the state used by Pattern during a match.
184     */
185    Matcher(Pattern parent, CharSequence text) {
186        usePattern(parent);
187        reset(text);
188    }
189
190    /**
191     * Returns the pattern that is interpreted by this matcher.
192     *
193     * @return  The pattern for which this matcher was created
194     */
195    public Pattern pattern() {
196        return pattern;
197    }
198
199    /**
200     * Returns the match state of this matcher as a {@link MatchResult}.
201     * The result is unaffected by subsequent operations performed upon this
202     * matcher.
203     *
204     * @return  a <code>MatchResult</code> with the state of this matcher
205     * @since 1.5
206     */
207    public MatchResult toMatchResult() {
208        ensureMatch();
209        return new OffsetBasedMatchResult(input, matchOffsets);
210    }
211
212    /**
213      * Changes the <tt>Pattern</tt> that this <tt>Matcher</tt> uses to
214      * find matches with.
215      *
216      * <p> This method causes this matcher to lose information
217      * about the groups of the last match that occurred. The
218      * matcher's position in the input is maintained and its
219      * last append position is unaffected.</p>
220      *
221      * @param  newPattern
222      *         The new pattern used by this matcher
223      * @return  This matcher
224      * @throws  IllegalArgumentException
225      *          If newPattern is <tt>null</tt>
226      * @since 1.5
227      */
228    public Matcher usePattern(Pattern newPattern) {
229        if (newPattern == null) {
230            throw new IllegalArgumentException("newPattern == null");
231        }
232
233        this.pattern = newPattern;
234
235        synchronized (this) {
236            if (nativeFinalizer != null) {
237                nativeFinalizer.run();
238                address = 0; // In case openImpl throws.
239                nativeFinalizer = null;
240            }
241            address = openImpl(pattern.address);
242            nativeFinalizer = registry.registerNativeAllocation(this, address);
243        }
244
245        if (input != null) {
246            resetForInput();
247        }
248
249        matchOffsets = new int[(groupCount() + 1) * 2];
250        matchFound = false;
251        return this;
252    }
253
254    /**
255     * Returns the offset after the last character matched.  </p>
256     *
257     * @return  The offset after the last character matched
258     *
259     * @throws  IllegalStateException
260     *          If no match has yet been attempted,
261     *          or if the previous match operation failed
262     */
263    public int end() {
264        return end(0);
265    }
266
267    /**
268     * Returns the offset after the last character of the subsequence
269     * captured by the given group during the previous match operation.
270     *
271     * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
272     * to right, starting at one.  Group zero denotes the entire pattern, so
273     * the expression <i>m.</i><tt>end(0)</tt> is equivalent to
274     * <i>m.</i><tt>end()</tt>.  </p>
275     *
276     * @param  group
277     *         The index of a capturing group in this matcher's pattern
278     *
279     * @return  The offset after the last character captured by the group,
280     *          or <tt>-1</tt> if the match was successful
281     *          but the group itself did not match anything
282     *
283     * @throws  IllegalStateException
284     *          If no match has yet been attempted,
285     *          or if the previous match operation failed
286     *
287     * @throws  IndexOutOfBoundsException
288     *          If there is no capturing group in the pattern
289     *          with the given index
290     */
291    public int end(int group) {
292        ensureMatch();
293        return matchOffsets[(group * 2) + 1];
294    }
295
296    /**
297     * Returns the offset after the last character of the subsequence
298     * captured by the given <a href="Pattern.html#groupname">named-capturing
299     * group</a> during the previous match operation.
300     *
301     * @param  name
302     *         The name of a named-capturing group in this matcher's pattern
303     *
304     * @return  The offset after the last character captured by the group,
305     *          or {@code -1} if the match was successful
306     *          but the group itself did not match anything
307     *
308     * @throws  IllegalStateException
309     *          If no match has yet been attempted,
310     *          or if the previous match operation failed
311     *
312     * @throws  IllegalArgumentException
313     *          If there is no capturing group in the pattern
314     *          with the given name
315     * @since 1.8
316     */
317    public int end(String name) {
318        ensureMatch();
319        return matchOffsets[getMatchedGroupIndex(pattern.address, name) * 2 + 1];
320    }
321
322
323    /**
324     * Returns the input subsequence matched by the previous match.
325     *
326     * <p> For a matcher <i>m</i> with input sequence <i>s</i>,
327     * the expressions <i>m.</i><tt>group()</tt> and
328     * <i>s.</i><tt>substring(</tt><i>m.</i><tt>start(),</tt>&nbsp;<i>m.</i><tt>end())</tt>
329     * are equivalent.  </p>
330     *
331     * <p> Note that some patterns, for example <tt>a*</tt>, match the empty
332     * string.  This method will return the empty string when the pattern
333     * successfully matches the empty string in the input.  </p>
334     *
335     * @return The (possibly empty) subsequence matched by the previous match,
336     *         in string form
337     *
338     * @throws  IllegalStateException
339     *          If no match has yet been attempted,
340     *          or if the previous match operation failed
341     */
342    public String group() {
343        return group(0);
344    }
345
346    /**
347     * Returns the input subsequence captured by the given group during the
348     * previous match operation.
349     *
350     * <p> For a matcher <i>m</i>, input sequence <i>s</i>, and group index
351     * <i>g</i>, the expressions <i>m.</i><tt>group(</tt><i>g</i><tt>)</tt> and
352     * <i>s.</i><tt>substring(</tt><i>m.</i><tt>start(</tt><i>g</i><tt>),</tt>&nbsp;<i>m.</i><tt>end(</tt><i>g</i><tt>))</tt>
353     * are equivalent.  </p>
354     *
355     * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
356     * to right, starting at one.  Group zero denotes the entire pattern, so
357     * the expression <tt>m.group(0)</tt> is equivalent to <tt>m.group()</tt>.
358     * </p>
359     *
360     * <p> If the match was successful but the group specified failed to match
361     * any part of the input sequence, then <tt>null</tt> is returned. Note
362     * that some groups, for example <tt>(a*)</tt>, match the empty string.
363     * This method will return the empty string when such a group successfully
364     * matches the empty string in the input.  </p>
365     *
366     * @param  group
367     *         The index of a capturing group in this matcher's pattern
368     *
369     * @return  The (possibly empty) subsequence captured by the group
370     *          during the previous match, or <tt>null</tt> if the group
371     *          failed to match part of the input
372     *
373     * @throws  IllegalStateException
374     *          If no match has yet been attempted,
375     *          or if the previous match operation failed
376     *
377     * @throws  IndexOutOfBoundsException
378     *          If there is no capturing group in the pattern
379     *          with the given index
380     */
381    public String group(int group) {
382        ensureMatch();
383        int from = matchOffsets[group * 2];
384        int to = matchOffsets[(group * 2) + 1];
385        if (from == -1 || to == -1) {
386            return null;
387        } else {
388            return input.substring(from, to);
389        }
390    }
391
392    /**
393     * Returns the input subsequence captured by the given
394     * <a href="Pattern.html#groupname">named-capturing group</a> during the previous
395     * match operation.
396     *
397     * <p> If the match was successful but the group specified failed to match
398     * any part of the input sequence, then <tt>null</tt> is returned. Note
399     * that some groups, for example <tt>(a*)</tt>, match the empty string.
400     * This method will return the empty string when such a group successfully
401     * matches the empty string in the input.  </p>
402     *
403     * @param  name
404     *         The name of a named-capturing group in this matcher's pattern
405     *
406     * @return  The (possibly empty) subsequence captured by the named group
407     *          during the previous match, or <tt>null</tt> if the group
408     *          failed to match part of the input
409     *
410     * @throws  IllegalStateException
411     *          If no match has yet been attempted,
412     *          or if the previous match operation failed
413     *
414     * @throws  IllegalArgumentException
415     *          If there is no capturing group in the pattern
416     *          with the given name
417     * @since 1.7
418     */
419    public String group(String name) {
420        ensureMatch();
421        int group = getMatchedGroupIndex(pattern.address, name);
422        int from = matchOffsets[group * 2];
423        int to = matchOffsets[(group * 2) + 1];
424        if (from == -1 || to == -1) {
425            return null;
426        } else {
427            return input.substring(from, to);
428        }
429    }
430
431    /**
432     * Returns the number of capturing groups in this matcher's pattern.
433     *
434     * <p> Group zero denotes the entire pattern by convention. It is not
435     * included in this count.
436     *
437     * <p> Any non-negative integer smaller than or equal to the value
438     * returned by this method is guaranteed to be a valid group index for
439     * this matcher.  </p>
440     *
441     * @return The number of capturing groups in this matcher's pattern
442     */
443    public int groupCount() {
444        synchronized (this) {
445            return groupCountImpl(address);
446        }
447    }
448
449    /**
450     * Attempts to match the entire region against the pattern.
451     *
452     * <p> If the match succeeds then more information can be obtained via the
453     * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods.  </p>
454     *
455     * @return  <tt>true</tt> if, and only if, the entire region sequence
456     *          matches this matcher's pattern
457     */
458    public boolean matches() {
459        synchronized (this) {
460            matchFound = matchesImpl(address, matchOffsets);
461        }
462        return matchFound;
463    }
464
465    /**
466     * Attempts to find the next subsequence of the input sequence that matches
467     * the pattern.
468     *
469     * <p> This method starts at the beginning of this matcher's region, or, if
470     * a previous invocation of the method was successful and the matcher has
471     * not since been reset, at the first character not matched by the previous
472     * match.
473     *
474     * <p> If the match succeeds then more information can be obtained via the
475     * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods.  </p>
476     *
477     * @return  <tt>true</tt> if, and only if, a subsequence of the input
478     *          sequence matches this matcher's pattern
479     */
480    public boolean find() {
481        synchronized (this) {
482            matchFound = findNextImpl(address, matchOffsets);
483        }
484        return matchFound;
485    }
486
487    /**
488     * Resets this matcher and then attempts to find the next subsequence of
489     * the input sequence that matches the pattern, starting at the specified
490     * index.
491     *
492     * <p> If the match succeeds then more information can be obtained via the
493     * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods, and subsequent
494     * invocations of the {@link #find()} method will start at the first
495     * character not matched by this match.  </p>
496     *
497     * @throws  IndexOutOfBoundsException
498     *          If start is less than zero or if start is greater than the
499     *          length of the input sequence.
500     *
501     * @return  <tt>true</tt> if, and only if, a subsequence of the input
502     *          sequence starting at the given index matches this matcher's
503     *          pattern
504     */
505    public boolean find(int start) {
506        reset();
507        if (start < 0 || start > input.length()) {
508            throw new IndexOutOfBoundsException("start=" + start + "; length=" + input.length());
509        }
510
511        synchronized (this) {
512            matchFound = findImpl(address, start, matchOffsets);
513        }
514        return matchFound;
515    }
516
517    /**
518     * Attempts to match the input sequence, starting at the beginning of the
519     * region, against the pattern.
520     *
521     * <p> Like the {@link #matches matches} method, this method always starts
522     * at the beginning of the region; unlike that method, it does not
523     * require that the entire region be matched.
524     *
525     * <p> If the match succeeds then more information can be obtained via the
526     * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods.  </p>
527     *
528     * @return  <tt>true</tt> if, and only if, a prefix of the input
529     *          sequence matches this matcher's pattern
530     */
531    public boolean lookingAt() {
532        synchronized (this) {
533            matchFound = lookingAtImpl(address, matchOffsets);
534        }
535        return matchFound;
536    }
537
538    /**
539     * Returns a literal replacement <code>String</code> for the specified
540     * <code>String</code>.
541     *
542     * This method produces a <code>String</code> that will work
543     * as a literal replacement <code>s</code> in the
544     * <code>appendReplacement</code> method of the {@link Matcher} class.
545     * The <code>String</code> produced will match the sequence of characters
546     * in <code>s</code> treated as a literal sequence. Slashes ('\') and
547     * dollar signs ('$') will be given no special meaning.
548     *
549     * @param  s The string to be literalized
550     * @return  A literal string replacement
551     * @since 1.5
552     */
553    public static String quoteReplacement(String s) {
554        if ((s.indexOf('\\') == -1) && (s.indexOf('$') == -1))
555            return s;
556        StringBuilder sb = new StringBuilder();
557        for (int i=0; i<s.length(); i++) {
558            char c = s.charAt(i);
559            if (c == '\\' || c == '$') {
560                sb.append('\\');
561            }
562            sb.append(c);
563        }
564        return sb.toString();
565    }
566
567    /**
568     * Implements a non-terminal append-and-replace step.
569     *
570     * <p> This method performs the following actions: </p>
571     *
572     * <ol>
573     *
574     *   <li><p> It reads characters from the input sequence, starting at the
575     *   append position, and appends them to the given string buffer.  It
576     *   stops after reading the last character preceding the previous match,
577     *   that is, the character at index {@link
578     *   #start()}&nbsp;<tt>-</tt>&nbsp;<tt>1</tt>.  </p></li>
579     *
580     *   <li><p> It appends the given replacement string to the string buffer.
581     *   </p></li>
582     *
583     *   <li><p> It sets the append position of this matcher to the index of
584     *   the last character matched, plus one, that is, to {@link #end()}.
585     *   </p></li>
586     *
587     * </ol>
588     *
589     * <p> The replacement string may contain references to subsequences
590     * captured during the previous match: Each occurrence of
591     * <tt>$</tt><i>g</i> will be replaced by the result of evaluating the corresponding
592     * {@link #group(int) group(g)</tt>} respectively. For  <tt>$</tt><i>g</i><tt></tt>,
593     * the first number after the <tt>$</tt> is always treated as part of
594     * the group reference. Subsequent numbers are incorporated into g if
595     * they would form a legal group reference. Only the numerals '0'
596     * through '9' are considered as potential components of the group
597     * reference. If the second group matched the string <tt>"foo"</tt>, for
598     * example, then passing the replacement string <tt>"$2bar"</tt> would
599     * cause <tt>"foobar"</tt> to be appended to the string buffer. A dollar
600     * sign (<tt>$</tt>) may be included as a literal in the replacement
601     * string by preceding it with a backslash (<tt>\$</tt>).
602     *
603     * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
604     * the replacement string may cause the results to be different than if it
605     * were being treated as a literal replacement string. Dollar signs may be
606     * treated as references to captured subsequences as described above, and
607     * backslashes are used to escape literal characters in the replacement
608     * string.
609     *
610     * <p> This method is intended to be used in a loop together with the
611     * {@link #appendTail appendTail} and {@link #find find} methods.  The
612     * following code, for example, writes <tt>one dog two dogs in the
613     * yard</tt> to the standard-output stream: </p>
614     *
615     * <blockquote><pre>
616     * Pattern p = Pattern.compile("cat");
617     * Matcher m = p.matcher("one cat two cats in the yard");
618     * StringBuffer sb = new StringBuffer();
619     * while (m.find()) {
620     *     m.appendReplacement(sb, "dog");
621     * }
622     * m.appendTail(sb);
623     * System.out.println(sb.toString());</pre></blockquote>
624     *
625     * @param  sb
626     *         The target string buffer
627     *
628     * @param  replacement
629     *         The replacement string
630     *
631     * @return  This matcher
632     *
633     * @throws  IllegalStateException
634     *          If no match has yet been attempted,
635     *          or if the previous match operation failed
636     *
637     * @throws  IllegalArgumentException
638     *          If the replacement string refers to a named-capturing
639     *          group that does not exist in the pattern
640     *
641     * @throws  IndexOutOfBoundsException
642     *          If the replacement string refers to a capturing group
643     *          that does not exist in the pattern
644     */
645    public Matcher appendReplacement(StringBuffer sb, String replacement) {
646        sb.append(input.substring(appendPos, start()));
647        appendEvaluated(sb, replacement);
648        appendPos = end();
649
650        return this;
651    }
652
653    /**
654     * Internal helper method to append a given string to a given string buffer.
655     * If the string contains any references to groups, these are replaced by
656     * the corresponding group's contents.
657     *
658     * @param buffer the string buffer.
659     * @param s the string to append.
660     */
661    private void appendEvaluated(StringBuffer buffer, String s) {
662        boolean escape = false;
663        boolean dollar = false;
664        boolean escapeNamedGroup = false;
665        int escapeNamedGroupStart = -1;
666
667        for (int i = 0; i < s.length(); i++) {
668            char c = s.charAt(i);
669            if (c == '\\' && !escape) {
670                escape = true;
671            } else if (c == '$' && !escape) {
672                dollar = true;
673            } else if (c >= '0' && c <= '9' && dollar) {
674                buffer.append(group(c - '0'));
675                dollar = false;
676            } else if (c == '{' && dollar) {
677                escapeNamedGroup = true;
678                escapeNamedGroupStart = i;
679            } else if (c == '}' && dollar && escapeNamedGroup) {
680                String namedGroupName =
681                    s.substring(escapeNamedGroupStart + 1, i);
682                buffer.append(group(namedGroupName));
683                dollar = false;
684                escapeNamedGroup = false;
685            } else if (c != '}' && dollar && escapeNamedGroup) {
686                continue;
687            } else {
688                buffer.append(c);
689                dollar = false;
690                escape = false;
691                escapeNamedGroup = false;
692            }
693        }
694
695        if (escapeNamedGroup) {
696            throw new IllegalArgumentException("Missing ending brace '}' from replacement string");
697        }
698
699        if (escape) {
700            throw new ArrayIndexOutOfBoundsException(s.length());
701        }
702    }
703
704
705    /**
706     * Implements a terminal append-and-replace step.
707     *
708     * <p> This method reads characters from the input sequence, starting at
709     * the append position, and appends them to the given string buffer.  It is
710     * intended to be invoked after one or more invocations of the {@link
711     * #appendReplacement appendReplacement} method in order to copy the
712     * remainder of the input sequence.  </p>
713     *
714     * @param  sb
715     *         The target string buffer
716     *
717     * @return  The target string buffer
718     */
719    public StringBuffer appendTail(StringBuffer sb) {
720        if (appendPos < regionEnd) {
721            sb.append(input.substring(appendPos, regionEnd));
722        }
723        return sb;
724    }
725
726    /**
727     * Replaces every subsequence of the input sequence that matches the
728     * pattern with the given replacement string.
729     *
730     * <p> This method first resets this matcher.  It then scans the input
731     * sequence looking for matches of the pattern.  Characters that are not
732     * part of any match are appended directly to the result string; each match
733     * is replaced in the result by the replacement string.  The replacement
734     * string may contain references to captured subsequences as in the {@link
735     * #appendReplacement appendReplacement} method.
736     *
737     * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
738     * the replacement string may cause the results to be different than if it
739     * were being treated as a literal replacement string. Dollar signs may be
740     * treated as references to captured subsequences as described above, and
741     * backslashes are used to escape literal characters in the replacement
742     * string.
743     *
744     * <p> Given the regular expression <tt>a*b</tt>, the input
745     * <tt>"aabfooaabfooabfoob"</tt>, and the replacement string
746     * <tt>"-"</tt>, an invocation of this method on a matcher for that
747     * expression would yield the string <tt>"-foo-foo-foo-"</tt>.
748     *
749     * <p> Invoking this method changes this matcher's state.  If the matcher
750     * is to be used in further matching operations then it should first be
751     * reset.  </p>
752     *
753     * @param  replacement
754     *         The replacement string
755     *
756     * @return  The string constructed by replacing each matching subsequence
757     *          by the replacement string, substituting captured subsequences
758     *          as needed
759     */
760    public String replaceAll(String replacement) {
761        reset();
762        StringBuffer buffer = new StringBuffer(input.length());
763        while (find()) {
764            appendReplacement(buffer, replacement);
765        }
766        return appendTail(buffer).toString();
767    }
768
769    /**
770     * Replaces the first subsequence of the input sequence that matches the
771     * pattern with the given replacement string.
772     *
773     * <p> This method first resets this matcher.  It then scans the input
774     * sequence looking for a match of the pattern.  Characters that are not
775     * part of the match are appended directly to the result string; the match
776     * is replaced in the result by the replacement string.  The replacement
777     * string may contain references to captured subsequences as in the {@link
778     * #appendReplacement appendReplacement} method.
779     *
780     * <p>Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
781     * the replacement string may cause the results to be different than if it
782     * were being treated as a literal replacement string. Dollar signs may be
783     * treated as references to captured subsequences as described above, and
784     * backslashes are used to escape literal characters in the replacement
785     * string.
786     *
787     * <p> Given the regular expression <tt>dog</tt>, the input
788     * <tt>"zzzdogzzzdogzzz"</tt>, and the replacement string
789     * <tt>"cat"</tt>, an invocation of this method on a matcher for that
790     * expression would yield the string <tt>"zzzcatzzzdogzzz"</tt>.  </p>
791     *
792     * <p> Invoking this method changes this matcher's state.  If the matcher
793     * is to be used in further matching operations then it should first be
794     * reset.  </p>
795     *
796     * @param  replacement
797     *         The replacement string
798     * @return  The string constructed by replacing the first matching
799     *          subsequence by the replacement string, substituting captured
800     *          subsequences as needed
801     */
802    public String replaceFirst(String replacement) {
803        reset();
804        StringBuffer buffer = new StringBuffer(input.length());
805        if (find()) {
806            appendReplacement(buffer, replacement);
807        }
808        return appendTail(buffer).toString();
809    }
810
811    /**
812     * Sets the limits of this matcher's region. The region is the part of the
813     * input sequence that will be searched to find a match. Invoking this
814     * method resets the matcher, and then sets the region to start at the
815     * index specified by the <code>start</code> parameter and end at the
816     * index specified by the <code>end</code> parameter.
817     *
818     * <p>Depending on the transparency and anchoring being used (see
819     * {@link #useTransparentBounds useTransparentBounds} and
820     * {@link #useAnchoringBounds useAnchoringBounds}), certain constructs such
821     * as anchors may behave differently at or around the boundaries of the
822     * region.
823     *
824     * @param  start
825     *         The index to start searching at (inclusive)
826     * @param  end
827     *         The index to end searching at (exclusive)
828     * @throws  IndexOutOfBoundsException
829     *          If start or end is less than zero, if
830     *          start is greater than the length of the input sequence, if
831     *          end is greater than the length of the input sequence, or if
832     *          start is greater than end.
833     * @return  this matcher
834     * @since 1.5
835     */
836    public Matcher region(int start, int end) {
837        return reset(originalInput, start, end);
838    }
839
840    /**
841     * Reports the start index of this matcher's region. The
842     * searches this matcher conducts are limited to finding matches
843     * within {@link #regionStart regionStart} (inclusive) and
844     * {@link #regionEnd regionEnd} (exclusive).
845     *
846     * @return  The starting point of this matcher's region
847     * @since 1.5
848     */
849    public int regionStart() {
850        return regionStart;
851    }
852
853    /**
854     * Reports the end index (exclusive) of this matcher's region.
855     * The searches this matcher conducts are limited to finding matches
856     * within {@link #regionStart regionStart} (inclusive) and
857     * {@link #regionEnd regionEnd} (exclusive).
858     *
859     * @return  the ending point of this matcher's region
860     * @since 1.5
861     */
862    public int regionEnd() {
863        return regionEnd;
864    }
865
866    /**
867     * Queries the transparency of region bounds for this matcher.
868     *
869     * <p> This method returns <tt>true</tt> if this matcher uses
870     * <i>transparent</i> bounds, <tt>false</tt> if it uses <i>opaque</i>
871     * bounds.
872     *
873     * <p> See {@link #useTransparentBounds useTransparentBounds} for a
874     * description of transparent and opaque bounds.
875     *
876     * <p> By default, a matcher uses opaque region boundaries.
877     *
878     * @return <tt>true</tt> iff this matcher is using transparent bounds,
879     *         <tt>false</tt> otherwise.
880     * @see java.util.regex.Matcher#useTransparentBounds(boolean)
881     * @since 1.5
882     */
883    public boolean hasTransparentBounds() {
884        return transparentBounds;
885    }
886
887    /**
888     * Sets the transparency of region bounds for this matcher.
889     *
890     * <p> Invoking this method with an argument of <tt>true</tt> will set this
891     * matcher to use <i>transparent</i> bounds. If the boolean
892     * argument is <tt>false</tt>, then <i>opaque</i> bounds will be used.
893     *
894     * <p> Using transparent bounds, the boundaries of this
895     * matcher's region are transparent to lookahead, lookbehind,
896     * and boundary matching constructs. Those constructs can see beyond the
897     * boundaries of the region to see if a match is appropriate.
898     *
899     * <p> Using opaque bounds, the boundaries of this matcher's
900     * region are opaque to lookahead, lookbehind, and boundary matching
901     * constructs that may try to see beyond them. Those constructs cannot
902     * look past the boundaries so they will fail to match anything outside
903     * of the region.
904     *
905     * <p> By default, a matcher uses opaque bounds.
906     *
907     * @param  value a boolean indicating whether to use opaque or transparent
908     *         regions
909     * @return this matcher
910     * @see java.util.regex.Matcher#hasTransparentBounds
911     * @since 1.5
912     */
913    public Matcher useTransparentBounds(boolean value) {
914        synchronized (this) {
915            transparentBounds = value;
916            useTransparentBoundsImpl(address, value);
917        }
918        return this;
919    }
920
921    /**
922     * Queries the anchoring of region bounds for this matcher.
923     *
924     * <p> This method returns <tt>true</tt> if this matcher uses
925     * <i>anchoring</i> bounds, <tt>false</tt> otherwise.
926     *
927     * <p> See {@link #useAnchoringBounds useAnchoringBounds} for a
928     * description of anchoring bounds.
929     *
930     * <p> By default, a matcher uses anchoring region boundaries.
931     *
932     * @return <tt>true</tt> iff this matcher is using anchoring bounds,
933     *         <tt>false</tt> otherwise.
934     * @see java.util.regex.Matcher#useAnchoringBounds(boolean)
935     * @since 1.5
936     */
937    public boolean hasAnchoringBounds() {
938        return anchoringBounds;
939    }
940
941    /**
942     * Sets the anchoring of region bounds for this matcher.
943     *
944     * <p> Invoking this method with an argument of <tt>true</tt> will set this
945     * matcher to use <i>anchoring</i> bounds. If the boolean
946     * argument is <tt>false</tt>, then <i>non-anchoring</i> bounds will be
947     * used.
948     *
949     * <p> Using anchoring bounds, the boundaries of this
950     * matcher's region match anchors such as ^ and $.
951     *
952     * <p> Without anchoring bounds, the boundaries of this
953     * matcher's region will not match anchors such as ^ and $.
954     *
955     * <p> By default, a matcher uses anchoring region boundaries.
956     *
957     * @param  value a boolean indicating whether or not to use anchoring bounds.
958     * @return this matcher
959     * @see java.util.regex.Matcher#hasAnchoringBounds
960     * @since 1.5
961     */
962    public Matcher useAnchoringBounds(boolean value) {
963        synchronized (this) {
964            anchoringBounds = value;
965            useAnchoringBoundsImpl(address, value);
966        }
967        return this;
968    }
969
970    /**
971     * <p>Returns the string representation of this matcher. The
972     * string representation of a <code>Matcher</code> contains information
973     * that may be useful for debugging. The exact format is unspecified.
974     *
975     * @return  The string representation of this matcher
976     * @since 1.5
977     */
978    public String toString() {
979        StringBuilder sb = new StringBuilder();
980        sb.append("java.util.regex.Matcher");
981        sb.append("[pattern=" + pattern());
982        sb.append(" region=");
983        sb.append(regionStart() + "," + regionEnd());
984        sb.append(" lastmatch=");
985        if (matchFound && (group() != null)) {
986            sb.append(group());
987        }
988        sb.append("]");
989        return sb.toString();
990    }
991
992    /**
993     * <p>Returns true if the end of input was hit by the search engine in
994     * the last match operation performed by this matcher.
995     *
996     * <p>When this method returns true, then it is possible that more input
997     * would have changed the result of the last search.
998     *
999     * @return  true iff the end of input was hit in the last match; false
1000     *          otherwise
1001     * @since 1.5
1002     */
1003    public boolean hitEnd() {
1004        synchronized (this) {
1005            return hitEndImpl(address);
1006        }
1007    }
1008
1009
1010    /**
1011     * <p>Returns true if more input could change a positive match into a
1012     * negative one.
1013     *
1014     * <p>If this method returns true, and a match was found, then more
1015     * input could cause the match to be lost. If this method returns false
1016     * and a match was found, then more input might change the match but the
1017     * match won't be lost. If a match was not found, then requireEnd has no
1018     * meaning.
1019     *
1020     * @return  true iff more input could change a positive match into a
1021     *          negative one.
1022     * @since 1.5
1023     */
1024    public boolean requireEnd() {
1025        synchronized (this) {
1026            return requireEndImpl(address);
1027        }
1028    }
1029
1030    /**
1031     * Resets this matcher.
1032     *
1033     * <p> Resetting a matcher discards all of its explicit state information
1034     * and sets its append position to zero. The matcher's region is set to the
1035     * default region, which is its entire character sequence. The anchoring
1036     * and transparency of this matcher's region boundaries are unaffected.
1037     *
1038     * @return  This matcher
1039     */
1040    public Matcher reset() {
1041        return reset(originalInput, 0, originalInput.length());
1042    }
1043
1044    /**
1045     * Resets this matcher with a new input sequence.
1046     *
1047     * <p> Resetting a matcher discards all of its explicit state information
1048     * and sets its append position to zero.  The matcher's region is set to
1049     * the default region, which is its entire character sequence.  The
1050     * anchoring and transparency of this matcher's region boundaries are
1051     * unaffected.
1052     *
1053     * @param  input
1054     *         The new input character sequence
1055     *
1056     * @return  This matcher
1057     */
1058    public Matcher reset(CharSequence input) {
1059        return reset(input, 0, input.length());
1060    }
1061
1062    /**
1063     * Resets the Matcher. A new input sequence and a new region can be
1064     * specified. Results of a previous find get lost. The next attempt to find
1065     * an occurrence of the Pattern in the string will start at the beginning of
1066     * the region. This is the internal version of reset() to which the several
1067     * public versions delegate.
1068     *
1069     * @param input
1070     *            the input sequence.
1071     * @param start
1072     *            the start of the region.
1073     * @param end
1074     *            the end of the region.
1075     *
1076     * @return the matcher itself.
1077     */
1078    private Matcher reset(CharSequence input, int start, int end) {
1079        if (input == null) {
1080            throw new IllegalArgumentException("input == null");
1081        }
1082
1083        if (start < 0 || end < 0 || start > input.length() || end > input.length() || start > end) {
1084            throw new IndexOutOfBoundsException();
1085        }
1086
1087        this.originalInput = input;
1088        this.input = input.toString();
1089        this.regionStart = start;
1090        this.regionEnd = end;
1091        resetForInput();
1092
1093        matchFound = false;
1094        appendPos = 0;
1095
1096        return this;
1097    }
1098
1099    private void resetForInput() {
1100        synchronized (this) {
1101            setInputImpl(address, input, regionStart, regionEnd);
1102            useAnchoringBoundsImpl(address, anchoringBounds);
1103            useTransparentBoundsImpl(address, transparentBounds);
1104        }
1105    }
1106
1107    /**
1108     * Makes sure that a successful match has been made. Is invoked internally
1109     * from various places in the class.
1110     *
1111     * @throws IllegalStateException
1112     *             if no successful match has been made.
1113     */
1114    private void ensureMatch() {
1115        if (!matchFound) {
1116            throw new IllegalStateException("No successful match so far");
1117        }
1118    }
1119
1120    /**
1121     * Returns the start index of the previous match.  </p>
1122     *
1123     * @return  The index of the first character matched
1124     *
1125     * @throws  IllegalStateException
1126     *          If no match has yet been attempted,
1127     *          or if the previous match operation failed
1128     */
1129    public int start() {
1130        return start(0);
1131    }
1132
1133    /**
1134     * Returns the start index of the subsequence captured by the given group
1135     * during the previous match operation.
1136     *
1137     * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
1138     * to right, starting at one.  Group zero denotes the entire pattern, so
1139     * the expression <i>m.</i><tt>start(0)</tt> is equivalent to
1140     * <i>m.</i><tt>start()</tt>.  </p>
1141     *
1142     * @param  group
1143     *         The index of a capturing group in this matcher's pattern
1144     *
1145     * @return  The index of the first character captured by the group,
1146     *          or <tt>-1</tt> if the match was successful but the group
1147     *          itself did not match anything
1148     *
1149     * @throws  IllegalStateException
1150     *          If no match has yet been attempted,
1151     *          or if the previous match operation failed
1152     *
1153     * @throws  IndexOutOfBoundsException
1154     *          If there is no capturing group in the pattern
1155     *          with the given index
1156     */
1157    public int start(int group) throws IllegalStateException {
1158        ensureMatch();
1159        return matchOffsets[group * 2];
1160    }
1161
1162
1163    /**
1164     * Returns the start index of the subsequence captured by the given
1165     * <a href="Pattern.html#groupname">named-capturing group</a> during the
1166     * previous match operation.
1167     *
1168     * @param  name
1169     *         The name of a named-capturing group in this matcher's pattern
1170     *
1171     * @return  The index of the first character captured by the group,
1172     *          or {@code -1} if the match was successful but the group
1173     *          itself did not match anything
1174     *
1175     * @throws  IllegalStateException
1176     *          If no match has yet been attempted,
1177     *          or if the previous match operation failed
1178     *
1179     * @throws  IllegalArgumentException
1180     *          If there is no capturing group in the pattern
1181     *          with the given name
1182     * @since 1.8
1183     */
1184    public int start(String name) {
1185        ensureMatch();
1186        return matchOffsets[getMatchedGroupIndex(pattern.address, name) * 2];
1187    }
1188
1189    private static int getMatchedGroupIndex(long patternAddr, String name) {
1190        int result = getMatchedGroupIndex0(patternAddr, name);
1191        if (result < 0) {
1192            throw new IllegalArgumentException("No capturing group in the pattern " +
1193                                               "with the name " + name);
1194        }
1195        return result;
1196    }
1197
1198    private static native int getMatchedGroupIndex0(long patternAddr, String name);
1199    private static native boolean findImpl(long addr, int startIndex, int[] offsets);
1200    private static native boolean findNextImpl(long addr, int[] offsets);
1201    private static native long getNativeFinalizer();
1202    private static native int groupCountImpl(long addr);
1203    private static native boolean hitEndImpl(long addr);
1204    private static native boolean lookingAtImpl(long addr, int[] offsets);
1205    private static native boolean matchesImpl(long addr, int[] offsets);
1206    private static native int nativeSize();
1207    private static native long openImpl(long patternAddr);
1208    private static native boolean requireEndImpl(long addr);
1209    private static native void setInputImpl(long addr, String s, int start, int end);
1210    private static native void useAnchoringBoundsImpl(long addr, boolean value);
1211    private static native void useTransparentBoundsImpl(long addr, boolean value);
1212
1213    /**
1214     * A trivial match result implementation that's based on an array of integers
1215     * representing match offsets. The array is of the form
1216     * {@code { start1, end1, start2, end2 ....}) where each consecutive pair of elements represents
1217     * the start and end of a match respectively.
1218     */
1219    static final class OffsetBasedMatchResult implements MatchResult {
1220        private final String input;
1221        private final int[] offsets;
1222
1223        OffsetBasedMatchResult(String input, int[] offsets) {
1224            this.input = input;
1225            this.offsets = offsets.clone();
1226        }
1227
1228        @Override
1229        public int start() {
1230            return start(0);
1231        }
1232
1233        @Override
1234        public int start(int group) {
1235            return offsets[2 * group];
1236        }
1237
1238        @Override
1239        public int end() {
1240            return end(0);
1241        }
1242
1243        @Override
1244        public int end(int group) {
1245            return offsets[2 * group + 1];
1246        }
1247
1248        @Override
1249        public String group() {
1250            return group(0);
1251        }
1252
1253        @Override
1254        public String group(int group) {
1255            final int start = start(group);
1256            final int end = end(group);
1257            if (start == -1 || end == -1) {
1258                return null;
1259            }
1260
1261            return input.substring(start, end);
1262        }
1263
1264        @Override
1265        public int groupCount() {
1266            return (offsets.length / 2) - 1;
1267        }
1268    }
1269}
1270