1fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project/*
2fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * Copyright (C) 2007 The Android Open Source Project
3fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project *
4fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * Licensed under the Apache License, Version 2.0 (the "License");
5fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * you may not use this file except in compliance with the License.
6fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * You may obtain a copy of the License at
7fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project *
8fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project *      http://www.apache.org/licenses/LICENSE-2.0
9fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project *
10fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * Unless required by applicable law or agreed to in writing, software
11fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * distributed under the License is distributed on an "AS IS" BASIS,
12fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * See the License for the specific language governing permissions and
14fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * limitations under the License.
15fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project */
16fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project
17fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Projectpackage java.util.regex;
18fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project
195f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughesimport java.io.IOException;
205f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughesimport java.io.ObjectInputStream;
21fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Projectimport java.io.Serializable;
22fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project
23fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project/**
245f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * Patterns are compiled regular expressions. In many cases, convenience methods such as
255f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * {@link String#matches String.matches}, {@link String#replaceAll String.replaceAll} and
265f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * {@link String#split String.split} will be preferable, but if you need to do a lot of work
275f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * with the same regular expression, it may be more efficient to compile it once and reuse it.
281c76910f0a0fb0cb761a4505f3be1204d6be012bElliott Hughes * The {@code Pattern} class and its companion, {@link Matcher}, also offer more functionality
291c76910f0a0fb0cb761a4505f3be1204d6be012bElliott Hughes * than the small amount exposed by {@code String}.
30f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes *
31fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * <pre>
325f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * // String convenience methods:
331c76910f0a0fb0cb761a4505f3be1204d6be012bElliott Hughes * boolean sawFailures = s.matches("Failures: \\d+");
341c76910f0a0fb0cb761a4505f3be1204d6be012bElliott Hughes * String farewell = s.replaceAll("Hello, (\\S+)", "Goodbye, $1");
355f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * String[] fields = s.split(":");
36f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes *
375f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * // Direct use of Pattern:
381c76910f0a0fb0cb761a4505f3be1204d6be012bElliott Hughes * Pattern p = Pattern.compile("Hello, (\\S+)");
395f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * Matcher m = p.matcher(inputString);
405f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * while (m.find()) { // Find each match in turn; String can't do this.
415f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes *     String name = m.group(1); // Access a submatch group; String can't do this.
425f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * }
43fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * </pre>
44f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes *
455f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <h3>Regular expression syntax</h3>
465f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <span class="datatable">
475f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <style type="text/css">
485f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * .datatable td { padding-right: 20px; }
495f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * </style>
50f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes *
515f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p>Java supports a subset of Perl 5 regular expression syntax. An important gotcha is that Java
525f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * has no regular expression literals, and uses plain old string literals instead. This means that
535f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * you need an extra level of escaping. For example, the regular expression {@code \s+} has to
545f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * be represented as the string {@code "\\s+"}.
55f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes *
565f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <h3>Escape sequences</h3>
575f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p><table>
585f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \ </td> <td>Quote the following metacharacter (so {@code \.} matches a literal {@code .}).</td> </tr>
595f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \Q </td> <td>Quote all following metacharacters until {@code \E}.</td> </tr>
605f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \E </td> <td>Stop quoting metacharacters (started by {@code \Q}).</td> </tr>
615f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \\ </td> <td>A literal backslash.</td> </tr>
62069774ea75c32440ad39f54c9a77d0eda6b20b53Elliott Hughes * <tr> <td> &#x005c;u<i>hhhh</i> </td> <td>The Unicode character U+hhhh (in hex).</td> </tr>
63069774ea75c32440ad39f54c9a77d0eda6b20b53Elliott Hughes * <tr> <td> &#x005c;x<i>hh</i> </td> <td>The Unicode character U+00hh (in hex).</td> </tr>
64069774ea75c32440ad39f54c9a77d0eda6b20b53Elliott Hughes * <tr> <td> \c<i>x</i> </td> <td>The ASCII control character ^x (so {@code \cH} would be ^H, U+0008).</td> </tr>
65f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes *
665f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \a </td> <td>The ASCII bell character (U+0007).</td> </tr>
675f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \e </td> <td>The ASCII ESC character (U+001b).</td> </tr>
685f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \f </td> <td>The ASCII form feed character (U+000c).</td> </tr>
695f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \n </td> <td>The ASCII newline character (U+000a).</td> </tr>
705f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \r </td> <td>The ASCII carriage return character (U+000d).</td> </tr>
715f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \t </td> <td>The ASCII tab character (U+0009).</td> </tr>
725f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * </table>
73f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes *
745f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <h3>Character classes</h3>
755f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p>It's possible to construct arbitrary character classes using set operations:
765f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <table>
775f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> [abc] </td> <td>Any one of {@code a}, {@code b}, or {@code c}. (Enumeration.)</td> </tr>
785f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> [a-c] </td> <td>Any one of {@code a}, {@code b}, or {@code c}. (Range.)</td> </tr>
795f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> [^abc] </td> <td>Any character <i>except</i> {@code a}, {@code b}, or {@code c}. (Negation.)</td> </tr>
805f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> [[a-f][0-9]] </td> <td>Any character in either range. (Union.)</td> </tr>
815f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> [[a-z]&&[jkl]] </td> <td>Any character in both ranges. (Intersection.)</td> </tr>
825f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * </table>
835f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p>Most of the time, the built-in character classes are more useful:
845f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <table>
85953dfe37db00b2610807c3f895c72183c7ce5b14Elliott Hughes * <tr> <td> \d </td> <td>Any digit character (see note below).</td> </tr>
86953dfe37db00b2610807c3f895c72183c7ce5b14Elliott Hughes * <tr> <td> \D </td> <td>Any non-digit character (see note below).</td> </tr>
87953dfe37db00b2610807c3f895c72183c7ce5b14Elliott Hughes * <tr> <td> \s </td> <td>Any whitespace character (see note below).</td> </tr>
88953dfe37db00b2610807c3f895c72183c7ce5b14Elliott Hughes * <tr> <td> \S </td> <td>Any non-whitespace character (see note below).</td> </tr>
89953dfe37db00b2610807c3f895c72183c7ce5b14Elliott Hughes * <tr> <td> \w </td> <td>Any word character (see note below).</td> </tr>
90953dfe37db00b2610807c3f895c72183c7ce5b14Elliott Hughes * <tr> <td> \W </td> <td>Any non-word character (see note below).</td> </tr>
915f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \p{<i>NAME</i>} </td> <td> Any character in the class with the given <i>NAME</i>. </td> </tr>
925f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \P{<i>NAME</i>} </td> <td> Any character <i>not</i> in the named class. </td> </tr>
935f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * </table>
94953dfe37db00b2610807c3f895c72183c7ce5b14Elliott Hughes * <p>Note that these built-in classes don't just cover the traditional ASCII range. For example,
95953dfe37db00b2610807c3f895c72183c7ce5b14Elliott Hughes * <code>\w</code> is equivalent to the character class <code>[\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Nd}]</code>.
96953dfe37db00b2610807c3f895c72183c7ce5b14Elliott Hughes * For more details see <a href="http://www.unicode.org/reports/tr18/#Compatibility_Properties">Unicode TR-18</a>,
97953dfe37db00b2610807c3f895c72183c7ce5b14Elliott Hughes * and bear in mind that the set of characters in each class can vary between Unicode releases.
98953dfe37db00b2610807c3f895c72183c7ce5b14Elliott Hughes * If you actually want to match only ASCII characters, specify the explicit characters you want;
99953dfe37db00b2610807c3f895c72183c7ce5b14Elliott Hughes * if you mean 0-9 use <code>[0-9]</code> rather than <code>\d</code>, which would also include
100953dfe37db00b2610807c3f895c72183c7ce5b14Elliott Hughes * Gurmukhi digits and so forth.
101953dfe37db00b2610807c3f895c72183c7ce5b14Elliott Hughes * <p>There are also a variety of named classes:
1025f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <ul>
1035f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <li><a href="../../lang/Character.html#unicode_categories">Unicode category names</a>,
1045f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * prefixed by {@code Is}. For example {@code \p{IsLu}} for all uppercase letters.
1055f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <li>POSIX class names. These are 'Alnum', 'Alpha', 'ASCII', 'Blank', 'Cntrl', 'Digit',
1065f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * 'Graph', 'Lower', 'Print', 'Punct', 'Upper', 'XDigit'.
10735edbd5fca5f4180fe7a57101a38ea01a2d9cc62Elliott Hughes * <li>Unicode block names, as accepted as input to {@link java.lang.Character.UnicodeBlock#forName},
10835edbd5fca5f4180fe7a57101a38ea01a2d9cc62Elliott Hughes * prefixed by {@code In}. For example {@code \p{InHebrew}} for all characters in the Hebrew block.
1095f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <li>Character method names. These are all non-deprecated methods from {@link java.lang.Character}
1105f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * whose name starts with {@code is}, but with the {@code is} replaced by {@code java}.
1115f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * For example, {@code \p{javaLowerCase}}.
1125f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * </ul>
113f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes *
1145f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <h3>Quantifiers</h3>
1155f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p>Quantifiers match some number of instances of the preceding regular expression.
1165f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <table>
1175f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> * </td> <td>Zero or more.</td> </tr>
1185f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> ? </td> <td>Zero or one.</td> </tr>
1195f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> + </td> <td>One or more.</td> </tr>
1205f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> {<i>n</i>} </td> <td>Exactly <i>n</i>.</td> </tr>
1215f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> {<i>n,</i>} </td> <td>At least <i>n</i>.</td> </tr>
1225f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> {<i>n</i>,<i>m</i>} </td> <td>At least <i>n</i> but not more than <i>m</i>.</td> </tr>
1235f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * </table>
1245f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p>Quantifiers are "greedy" by default, meaning that they will match the longest possible input
1255f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * sequence. There are also non-greedy quantifiers that match the shortest possible input sequence.
1265f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * They're same as the greedy ones but with a trailing {@code ?}:
1275f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <table>
1285f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> *? </td> <td>Zero or more (non-greedy).</td> </tr>
1295f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> ?? </td> <td>Zero or one (non-greedy).</td> </tr>
1305f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> +? </td> <td>One or more (non-greedy).</td> </tr>
1315f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> {<i>n</i>}? </td> <td>Exactly <i>n</i> (non-greedy).</td> </tr>
1325f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> {<i>n,</i>}? </td> <td>At least <i>n</i> (non-greedy).</td> </tr>
1335f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> {<i>n</i>,<i>m</i>}? </td> <td>At least <i>n</i> but not more than <i>m</i> (non-greedy).</td> </tr>
1345f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * </table>
1355f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p>Quantifiers allow backtracking by default. There are also possessive quantifiers to prevent
1365f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * backtracking. They're same as the greedy ones but with a trailing {@code +}:
1375f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <table>
1385f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> *+ </td> <td>Zero or more (possessive).</td> </tr>
1395f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> ?+ </td> <td>Zero or one (possessive).</td> </tr>
1405f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> ++ </td> <td>One or more (possessive).</td> </tr>
1415f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> {<i>n</i>}+ </td> <td>Exactly <i>n</i> (possessive).</td> </tr>
1425f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> {<i>n,</i>}+ </td> <td>At least <i>n</i> (possessive).</td> </tr>
1435f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> {<i>n</i>,<i>m</i>}+ </td> <td>At least <i>n</i> but not more than <i>m</i> (possessive).</td> </tr>
1445f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * </table>
145f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes *
1465f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <h3>Zero-width assertions</h3>
1475f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p><table>
1485f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> ^ </td> <td>At beginning of line.</td> </tr>
1495f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> $ </td> <td>At end of line.</td> </tr>
1505f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \A </td> <td>At beginning of input.</td> </tr>
1515f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \b </td> <td>At word boundary.</td> </tr>
1525f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \B </td> <td>At non-word boundary.</td> </tr>
1535f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \G </td> <td>At end of previous match.</td> </tr>
1545f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \z </td> <td>At end of input.</td> </tr>
1555f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \Z </td> <td>At end of input, or before newline at end.</td> </tr>
1565f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * </table>
157f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes *
1585f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <h3>Look-around assertions</h3>
1595f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p>Look-around assertions assert that the subpattern does (positive) or doesn't (negative) match
1605f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * after (look-ahead) or before (look-behind) the current position, without including the matched
1615f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * text in the containing match. The maximum length of possible matches for look-behind patterns
1625f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * must not be unbounded.
1635f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p><table>
1645f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> (?=<i>a</i>) </td> <td>Zero-width positive look-ahead.</td> </tr>
1655f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> (?!<i>a</i>) </td> <td>Zero-width negative look-ahead.</td> </tr>
1665f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> (?&lt;=<i>a</i>) </td> <td>Zero-width positive look-behind.</td> </tr>
1675f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> (?&lt;!<i>a</i>) </td> <td>Zero-width negative look-behind.</td> </tr>
1685f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * </table>
169f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes *
1705f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <h3>Groups</h3>
171f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes *
1725f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p><table>
1735f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> (<i>a</i>) </td> <td>A capturing group.</td> </tr>
1745f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> (?:<i>a</i>) </td> <td>A non-capturing group.</td> </tr>
1755f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> (?&gt;<i>a</i>) </td> <td>An independent non-capturing group. (The first match of the subgroup is the only match tried.)</td> </tr>
1765f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \<i>n</i> </td> <td>The text already matched by capturing group <i>n</i>.</td> </tr>
1775f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * </table>
1785850271b4ab93ebc27c1d49169a348c6be3c7f04Elliott Hughes * <p>See {@link Matcher#group} for details of how capturing groups are numbered and accessed.
179f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes *
1805f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <h3>Operators</h3>
1815f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p><table>
1825f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> <i>ab</i> </td> <td>Expression <i>a</i> followed by expression <i>b</i>.</td> </tr>
1835f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> <i>a</i>|<i>b</i> </td> <td>Either expression <i>a</i> or expression <i>b</i>.</td> </tr>
1845f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * </table>
185f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes *
1865f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <a name="flags"><h3>Flags</h3></a>
1875f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p><table>
1885f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> (?dimsux-dimsux:<i>a</i>) </td> <td>Evaluates the expression <i>a</i> with the given flags enabled/disabled.</td> </tr>
1895f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> (?dimsux-dimsux) </td> <td>Evaluates the rest of the pattern with the given flags enabled/disabled.</td> </tr>
1905f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * </table>
191f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes *
1925f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p>The flags are:
1935f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <table>
1945f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr><td>{@code i}</td> <td>{@link #CASE_INSENSITIVE}</td> <td>case insensitive matching</td></tr>
1955f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr><td>{@code d}</td> <td>{@link #UNIX_LINES}</td>       <td>only accept {@code '\n'} as a line terminator</td></tr>
1965f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr><td>{@code m}</td> <td>{@link #MULTILINE}</td>        <td>allow {@code ^} and {@code $} to match beginning/end of any line</td></tr>
1975f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr><td>{@code s}</td> <td>{@link #DOTALL}</td>           <td>allow {@code .} to match {@code '\n'} ("s" for "single line")</td></tr>
1985f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr><td>{@code u}</td> <td>{@link #UNICODE_CASE}</td>     <td>enable Unicode case folding</td></tr>
1995f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr><td>{@code x}</td> <td>{@link #COMMENTS}</td>         <td>allow whitespace and comments</td></tr>
2005f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * </table>
2015f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p>Either set of flags may be empty. For example, {@code (?i-m)} would turn on case-insensitivity
2025f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * and turn off multiline mode, {@code (?i)} would just turn on case-insensitivity,
2035f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * and {@code (?-m)} would just turn off multiline mode.
2045f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p>Note that on Android, {@code UNICODE_CASE} is always on: case-insensitive matching will
2055f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * always be Unicode-aware.
2065f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p>There are two other flags not settable via this mechanism: {@link #CANON_EQ} and
2075f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * {@link #LITERAL}. Attempts to use {@link #CANON_EQ} on Android will throw an exception.
2085f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * </span>
209f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes *
2105f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <h3>Implementation notes</h3>
211f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes *
2121c76910f0a0fb0cb761a4505f3be1204d6be012bElliott Hughes * <p>The regular expression implementation used in Android is provided by
2135f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <a href="http://www.icu-project.org">ICU</a>. The notation for the regular
2145f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * expressions is mostly a superset of those used in other Java language
2155f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * implementations. This means that existing applications will normally work as
2165f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * expected, but in rare cases Android may accept a regular expression that is
2175f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * not accepted by other implementations.
218f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes *
2195f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p>In some cases, Android will recognize that a regular expression is a simple
2205f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * special case that can be handled more efficiently. This is true of both the convenience methods
2215f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * in {@code String} and the methods in {@code Pattern}.
222f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes *
223fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * @see Matcher
224fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project */
225fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Projectpublic final class Pattern implements Serializable {
226f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes
227fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project    private static final long serialVersionUID = 5073258162644648461L;
228f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes
229fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project    /**
230fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     * This constant specifies that a pattern matches Unix line endings ('\n')
2315f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes     * only against the '.', '^', and '$' meta characters. Corresponds to {@code (?d)}.
232fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     */
233fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project    public static final int UNIX_LINES = 0x01;
234fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project
235fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project    /**
236fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     * This constant specifies that a {@code Pattern} is matched
237fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     * case-insensitively. That is, the patterns "a+" and "A+" would both match
2385f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes     * the string "aAaAaA". See {@link #UNICODE_CASE}. Corresponds to {@code (?i)}.
239fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     */
240fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project    public static final int CASE_INSENSITIVE = 0x02;
241fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project
242fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project    /**
243fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     * This constant specifies that a {@code Pattern} may contain whitespace or
244fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     * comments. Otherwise comments and whitespace are taken as literal
2455f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes     * characters. Corresponds to {@code (?x)}.
246fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     */
247fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project    public static final int COMMENTS = 0x04;
248fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project
249fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project    /**
250fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     * This constant specifies that the meta characters '^' and '$' match only
2515f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes     * the beginning and end of an input line, respectively. Normally, they
2525f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes     * match the beginning and the end of the complete input. Corresponds to {@code (?m)}.
253fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     */
254fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project    public static final int MULTILINE = 0x08;
255fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project
256fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project    /**
257fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     * This constant specifies that the whole {@code Pattern} is to be taken
258fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     * literally, that is, all meta characters lose their meanings.
259fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     */
260fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project    public static final int LITERAL = 0x10;
261fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project
262fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project    /**
263fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     * This constant specifies that the '.' meta character matches arbitrary
264fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     * characters, including line endings, which is normally not the case.
2655f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes     * Corresponds to {@code (?s)}.
266fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     */
267fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project    public static final int DOTALL = 0x20;
268fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project
269fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project    /**
2705f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes     * This constant specifies that a {@code Pattern} that uses case-insensitive matching
2715f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes     * will use Unicode case folding. On Android, {@code UNICODE_CASE} is always on:
2725f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes     * case-insensitive matching will always be Unicode-aware. If your code is intended to
2735f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes     * be portable and uses case-insensitive matching on non-ASCII characters, you should
2745f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes     * use this flag. Corresponds to {@code (?u)}.
275fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     */
276fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project    public static final int UNICODE_CASE = 0x40;
277fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project
278fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project    /**
279fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     * This constant specifies that a character in a {@code Pattern} and a
280fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     * character in the input string only match if they are canonically
281fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     * equivalent. It is (currently) not supported in Android.
282fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     */
283fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project    public static final int CANON_EQ = 0x80;
284fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project
2855f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes    private final String pattern;
2865f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes    private final int flags;
287fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project
288ad616f2163d1986f095fc0910f2c94a2c564497cJoel Dice    transient long address;
28977d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson
290fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project    /**
2915f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes     * Returns a {@link Matcher} for this pattern applied to the given {@code input}.
2925f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes     * The {@code Matcher} can be used to match the {@code Pattern} against the
29377d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson     * whole input, find occurrences of the {@code Pattern} in the input, or
29477d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson     * replace parts of the input.
295fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     */
29677d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson    public Matcher matcher(CharSequence input) {
29777d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson        return new Matcher(this, input);
29877d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson    }
29977d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson
30077d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson    /**
3015f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes     * Splits the given {@code input} at occurrences of this pattern.
30277d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson     *
3035f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes     * <p>If this pattern does not occur in the input, the result is an
30477d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson     * array containing the input (converted from a {@code CharSequence} to
30577d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson     * a {@code String}).
30677d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson     *
30777d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson     * <p>Otherwise, the {@code limit} parameter controls the contents of the
30877d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson     * returned array as described below.
30977d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson     *
31077d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson     * @param limit
31177d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson     *            Determines the maximum number of entries in the resulting
31277d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson     *            array, and the treatment of trailing empty strings.
31377d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson     *            <ul>
31477d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson     *            <li>For n &gt; 0, the resulting array contains at most n
31577d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson     *            entries. If this is fewer than the number of matches, the
31677d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson     *            final entry will contain all remaining input.
31777d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson     *            <li>For n &lt; 0, the length of the resulting array is
31877d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson     *            exactly the number of occurrences of the {@code Pattern}
31977d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson     *            plus one for the text after the final separator.
32077d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson     *            All entries are included.
32177d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson     *            <li>For n == 0, the result is as for n &lt; 0, except
32277d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson     *            trailing empty strings will not be returned. (Note that
32377d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson     *            the case where the input is itself an empty string is
32477d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson     *            special, as described above, and the limit parameter does
32577d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson     *            not apply there.)
32677d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson     *            </ul>
32777d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson     */
3280510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes    public String[] split(CharSequence input, int limit) {
3290510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        return Splitter.split(this, pattern, input.toString(), limit);
33077d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson    }
33177d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson
33277d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson    /**
3335f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes     * Equivalent to {@code split(input, 0)}.
33477d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson     */
33577d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson    public String[] split(CharSequence input) {
33677d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson        return split(input, 0);
33777d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson    }
33877d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson
33977d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson    /**
3405f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes     * Returns the regular expression supplied to {@code compile}.
34177d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson     */
34277d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson    public String pattern() {
34377d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson        return pattern;
34477d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson    }
34577d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson
34677d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson    @Override
34777d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson    public String toString() {
34877d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson        return pattern;
34977d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson    }
35077d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson
35177d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson    /**
3525f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes     * Returns the flags supplied to {@code compile}.
35377d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson     */
35477d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson    public int flags() {
35577d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson        return flags;
356fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project    }
357fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project
358fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project    /**
3595f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes     * Returns a compiled form of the given {@code regularExpression}, as modified by the
3605f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes     * given {@code flags}. See the <a href="#flags">flags overview</a> for more on flags.
361f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes     *
3625f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes     * @throws PatternSyntaxException if the regular expression is syntactically incorrect.
363f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes     *
364fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     * @see #CANON_EQ
365fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     * @see #CASE_INSENSITIVE
366fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     * @see #COMMENTS
367fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     * @see #DOTALL
368fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     * @see #LITERAL
369fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     * @see #MULTILINE
370fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     * @see #UNICODE_CASE
371fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     * @see #UNIX_LINES
372fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     */
3735f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes    public static Pattern compile(String regularExpression, int flags) throws PatternSyntaxException {
3745f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes        return new Pattern(regularExpression, flags);
375fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project    }
376fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project
377fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project    /**
3785f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes     * Equivalent to {@code Pattern.compile(pattern, 0)}.
379fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     */
3805f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes    public static Pattern compile(String pattern) {
3815f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes        return new Pattern(pattern, 0);
3825f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes    }
3835f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes
384fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project    private Pattern(String pattern, int flags) throws PatternSyntaxException {
385fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project        if ((flags & CANON_EQ) != 0) {
386fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project            throw new UnsupportedOperationException("CANON_EQ flag not supported");
387fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project        }
3885700a9718eef20f4228ed97d54a59cc70bf40753Elliott Hughes        int supportedFlags = CASE_INSENSITIVE | COMMENTS | DOTALL | LITERAL | MULTILINE | UNICODE_CASE | UNIX_LINES;
3895700a9718eef20f4228ed97d54a59cc70bf40753Elliott Hughes        if ((flags & ~supportedFlags) != 0) {
3905700a9718eef20f4228ed97d54a59cc70bf40753Elliott Hughes            throw new IllegalArgumentException("Unsupported flags: " + (flags & ~supportedFlags));
3915700a9718eef20f4228ed97d54a59cc70bf40753Elliott Hughes        }
392fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project        this.pattern = pattern;
393fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project        this.flags = flags;
394e377c5a424823bacf28e8ffb2a2f02bfe46d4a2bElliott Hughes        compile();
395fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project    }
39677d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson
397e377c5a424823bacf28e8ffb2a2f02bfe46d4a2bElliott Hughes    private void compile() throws PatternSyntaxException {
398fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project        if (pattern == null) {
399e377c5a424823bacf28e8ffb2a2f02bfe46d4a2bElliott Hughes            throw new NullPointerException("pattern == null");
400fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project        }
401f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes
402e377c5a424823bacf28e8ffb2a2f02bfe46d4a2bElliott Hughes        String icuPattern = pattern;
403fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project        if ((flags & LITERAL) != 0) {
404e377c5a424823bacf28e8ffb2a2f02bfe46d4a2bElliott Hughes            icuPattern = quote(pattern);
405fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project        }
406f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes
407fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project        // These are the flags natively supported by ICU.
408fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project        // They even have the same value in native code.
409e377c5a424823bacf28e8ffb2a2f02bfe46d4a2bElliott Hughes        int icuFlags = flags & (CASE_INSENSITIVE | COMMENTS | MULTILINE | DOTALL | UNIX_LINES);
410f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes
411e377c5a424823bacf28e8ffb2a2f02bfe46d4a2bElliott Hughes        address = compileImpl(icuPattern, icuFlags);
412fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project    }
413fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project
414fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project    /**
4155f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes     * Tests whether the given {@code regularExpression} matches the given {@code input}.
4165f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes     * Equivalent to {@code Pattern.compile(regularExpression).matcher(input).matches()}.
4175f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes     * If the same regular expression is to be used for multiple operations, it may be more
4185f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes     * efficient to reuse a compiled {@code Pattern}.
419f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes     *
420fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     * @see Pattern#compile(java.lang.String, int)
421fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     * @see Matcher#matches()
422fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     */
4235f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes    public static boolean matches(String regularExpression, CharSequence input) {
4245f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes        return new Matcher(new Pattern(regularExpression, 0), input).matches();
425fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project    }
426fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project
427fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project    /**
4285f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes     * Quotes the given {@code string} using "\Q" and "\E", so that all
4295f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes     * meta-characters lose their special meaning. This method correctly
4305f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes     * escapes embedded instances of "\Q" or "\E". If the entire result
4315f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes     * is to be passed verbatim to {@link #compile}, it's usually clearer
4325f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes     * to use the {@link #LITERAL} flag instead.
433fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project     */
4345f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes    public static String quote(String string) {
4355f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes        StringBuilder sb = new StringBuilder();
4365f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes        sb.append("\\Q");
437fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project        int apos = 0;
438fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project        int k;
4395f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes        while ((k = string.indexOf("\\E", apos)) >= 0) {
4405f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes            sb.append(string.substring(apos, k + 2)).append("\\\\E\\Q");
441fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project            apos = k + 2;
442fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project        }
4435f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes        return sb.append(string.substring(apos)).append("\\E").toString();
444fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project    }
445f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes
446e2f58c9501eac730d048199906dc41fe8e4cd6e9Brian Carlstrom    @Override protected void finalize() throws Throwable {
447dd828f42a5c83b4270d4fbf6fce2da1878f1e84aThe Android Open Source Project        try {
448e377c5a424823bacf28e8ffb2a2f02bfe46d4a2bElliott Hughes            closeImpl(address);
4495f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes        } finally {
450dd828f42a5c83b4270d4fbf6fce2da1878f1e84aThe Android Open Source Project            super.finalize();
451fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project        }
452fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project    }
453fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project
4545f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes    private void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException {
455fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project        s.defaultReadObject();
456e377c5a424823bacf28e8ffb2a2f02bfe46d4a2bElliott Hughes        compile();
457fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project    }
458e377c5a424823bacf28e8ffb2a2f02bfe46d4a2bElliott Hughes
459ad616f2163d1986f095fc0910f2c94a2c564497cJoel Dice    private static native void closeImpl(long addr);
460ad616f2163d1986f095fc0910f2c94a2c564497cJoel Dice    private static native long compileImpl(String regex, int flags);
461fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project}
462