Pattern.java revision e377c5a424823bacf28e8ffb2a2f02bfe46d4a2b
1fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project/* 2fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * Copyright (C) 2007 The Android Open Source Project 3fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * 4fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * Licensed under the Apache License, Version 2.0 (the "License"); 5fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * you may not use this file except in compliance with the License. 6fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * You may obtain a copy of the License at 7fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * 8fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * http://www.apache.org/licenses/LICENSE-2.0 9fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * 10fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * Unless required by applicable law or agreed to in writing, software 11fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * distributed under the License is distributed on an "AS IS" BASIS, 12fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * See the License for the specific language governing permissions and 14fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * limitations under the License. 15fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project */ 16fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project 17fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Projectpackage java.util.regex; 18fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project 195f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughesimport java.io.IOException; 205f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughesimport java.io.ObjectInputStream; 21fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Projectimport java.io.Serializable; 22fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Projectimport java.util.ArrayList; 23fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project 24fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project/** 255f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * Patterns are compiled regular expressions. In many cases, convenience methods such as 265f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * {@link String#matches String.matches}, {@link String#replaceAll String.replaceAll} and 275f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * {@link String#split String.split} will be preferable, but if you need to do a lot of work 285f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * with the same regular expression, it may be more efficient to compile it once and reuse it. 295f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * The {@code Pattern} class and its companion, {@link Matcher}, are also a lot more powerful 305f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * than the small amount of functionality exposed by {@code String}. 31f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 32fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * <pre> 335f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * // String convenience methods: 345f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * boolean sawFailures = s.matches("Failures: \d+"); 355f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * String farewell = s.replaceAll("Hello, (\S+)", "Goodbye, $1"); 365f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * String[] fields = s.split(":"); 37f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 385f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * // Direct use of Pattern: 395f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * Pattern p = Pattern.compile("Hello, (\S+)"); 405f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * Matcher m = p.matcher(inputString); 415f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * while (m.find()) { // Find each match in turn; String can't do this. 425f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * String name = m.group(1); // Access a submatch group; String can't do this. 435f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * } 44fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * </pre> 45f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 465f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <h3>Regular expression syntax</h3> 475f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <span class="datatable"> 485f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <style type="text/css"> 495f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * .datatable td { padding-right: 20px; } 505f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * </style> 51f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 525f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p>Java supports a subset of Perl 5 regular expression syntax. An important gotcha is that Java 535f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * has no regular expression literals, and uses plain old string literals instead. This means that 545f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * you need an extra level of escaping. For example, the regular expression {@code \s+} has to 555f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * be represented as the string {@code "\\s+"}. 56f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 575f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <h3>Escape sequences</h3> 585f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p><table> 595f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \ </td> <td>Quote the following metacharacter (so {@code \.} matches a literal {@code .}).</td> </tr> 605f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \Q </td> <td>Quote all following metacharacters until {@code \E}.</td> </tr> 615f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \E </td> <td>Stop quoting metacharacters (started by {@code \Q}).</td> </tr> 625f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \\ </td> <td>A literal backslash.</td> </tr> 63069774ea75c32440ad39f54c9a77d0eda6b20b53Elliott Hughes * <tr> <td> \u<i>hhhh</i> </td> <td>The Unicode character U+hhhh (in hex).</td> </tr> 64069774ea75c32440ad39f54c9a77d0eda6b20b53Elliott Hughes * <tr> <td> \x<i>hh</i> </td> <td>The Unicode character U+00hh (in hex).</td> </tr> 65069774ea75c32440ad39f54c9a77d0eda6b20b53Elliott Hughes * <tr> <td> \c<i>x</i> </td> <td>The ASCII control character ^x (so {@code \cH} would be ^H, U+0008).</td> </tr> 66f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 675f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \a </td> <td>The ASCII bell character (U+0007).</td> </tr> 685f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \e </td> <td>The ASCII ESC character (U+001b).</td> </tr> 695f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \f </td> <td>The ASCII form feed character (U+000c).</td> </tr> 705f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \n </td> <td>The ASCII newline character (U+000a).</td> </tr> 715f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \r </td> <td>The ASCII carriage return character (U+000d).</td> </tr> 725f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \t </td> <td>The ASCII tab character (U+0009).</td> </tr> 735f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * </table> 74f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 755f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <h3>Character classes</h3> 765f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p>It's possible to construct arbitrary character classes using set operations: 775f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <table> 785f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> [abc] </td> <td>Any one of {@code a}, {@code b}, or {@code c}. (Enumeration.)</td> </tr> 795f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> [a-c] </td> <td>Any one of {@code a}, {@code b}, or {@code c}. (Range.)</td> </tr> 805f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> [^abc] </td> <td>Any character <i>except</i> {@code a}, {@code b}, or {@code c}. (Negation.)</td> </tr> 815f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> [[a-f][0-9]] </td> <td>Any character in either range. (Union.)</td> </tr> 825f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> [[a-z]&&[jkl]] </td> <td>Any character in both ranges. (Intersection.)</td> </tr> 835f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * </table> 845f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p>Most of the time, the built-in character classes are more useful: 855f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <table> 865f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \d </td> <td>Any digit character.</td> </tr> 875f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \D </td> <td>Any non-digit character.</td> </tr> 885f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \s </td> <td>Any whitespace character.</td> </tr> 895f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \S </td> <td>Any non-whitespace character.</td> </tr> 905f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \w </td> <td>Any word character.</td> </tr> 915f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \W </td> <td>Any non-word character.</td> </tr> 925f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \p{<i>NAME</i>} </td> <td> Any character in the class with the given <i>NAME</i>. </td> </tr> 935f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \P{<i>NAME</i>} </td> <td> Any character <i>not</i> in the named class. </td> </tr> 945f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * </table> 955f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p>There are a variety of named classes: 965f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <ul> 975f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <li><a href="../../lang/Character.html#unicode_categories">Unicode category names</a>, 985f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * prefixed by {@code Is}. For example {@code \p{IsLu}} for all uppercase letters. 995f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <li>POSIX class names. These are 'Alnum', 'Alpha', 'ASCII', 'Blank', 'Cntrl', 'Digit', 1005f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * 'Graph', 'Lower', 'Print', 'Punct', 'Upper', 'XDigit'. 1015f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <li>Unicode block names, as used by {@link java.lang.Character.UnicodeBlock#forName} prefixed 1025f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * by {@code In}. For example {@code \p{InHebrew}} for all characters in the Hebrew block. 1035f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <li>Character method names. These are all non-deprecated methods from {@link java.lang.Character} 1045f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * whose name starts with {@code is}, but with the {@code is} replaced by {@code java}. 1055f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * For example, {@code \p{javaLowerCase}}. 1065f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * </ul> 107f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 1085f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <h3>Quantifiers</h3> 1095f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p>Quantifiers match some number of instances of the preceding regular expression. 1105f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <table> 1115f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> * </td> <td>Zero or more.</td> </tr> 1125f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> ? </td> <td>Zero or one.</td> </tr> 1135f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> + </td> <td>One or more.</td> </tr> 1145f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> {<i>n</i>} </td> <td>Exactly <i>n</i>.</td> </tr> 1155f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> {<i>n,</i>} </td> <td>At least <i>n</i>.</td> </tr> 1165f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> {<i>n</i>,<i>m</i>} </td> <td>At least <i>n</i> but not more than <i>m</i>.</td> </tr> 1175f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * </table> 1185f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p>Quantifiers are "greedy" by default, meaning that they will match the longest possible input 1195f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * sequence. There are also non-greedy quantifiers that match the shortest possible input sequence. 1205f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * They're same as the greedy ones but with a trailing {@code ?}: 1215f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <table> 1225f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> *? </td> <td>Zero or more (non-greedy).</td> </tr> 1235f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> ?? </td> <td>Zero or one (non-greedy).</td> </tr> 1245f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> +? </td> <td>One or more (non-greedy).</td> </tr> 1255f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> {<i>n</i>}? </td> <td>Exactly <i>n</i> (non-greedy).</td> </tr> 1265f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> {<i>n,</i>}? </td> <td>At least <i>n</i> (non-greedy).</td> </tr> 1275f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> {<i>n</i>,<i>m</i>}? </td> <td>At least <i>n</i> but not more than <i>m</i> (non-greedy).</td> </tr> 1285f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * </table> 1295f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p>Quantifiers allow backtracking by default. There are also possessive quantifiers to prevent 1305f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * backtracking. They're same as the greedy ones but with a trailing {@code +}: 1315f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <table> 1325f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> *+ </td> <td>Zero or more (possessive).</td> </tr> 1335f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> ?+ </td> <td>Zero or one (possessive).</td> </tr> 1345f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> ++ </td> <td>One or more (possessive).</td> </tr> 1355f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> {<i>n</i>}+ </td> <td>Exactly <i>n</i> (possessive).</td> </tr> 1365f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> {<i>n,</i>}+ </td> <td>At least <i>n</i> (possessive).</td> </tr> 1375f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> {<i>n</i>,<i>m</i>}+ </td> <td>At least <i>n</i> but not more than <i>m</i> (possessive).</td> </tr> 1385f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * </table> 139f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 1405f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <h3>Zero-width assertions</h3> 1415f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p><table> 1425f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> ^ </td> <td>At beginning of line.</td> </tr> 1435f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> $ </td> <td>At end of line.</td> </tr> 1445f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \A </td> <td>At beginning of input.</td> </tr> 1455f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \b </td> <td>At word boundary.</td> </tr> 1465f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \B </td> <td>At non-word boundary.</td> </tr> 1475f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \G </td> <td>At end of previous match.</td> </tr> 1485f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \z </td> <td>At end of input.</td> </tr> 1495f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \Z </td> <td>At end of input, or before newline at end.</td> </tr> 1505f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * </table> 151f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 1525f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <h3>Look-around assertions</h3> 1535f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p>Look-around assertions assert that the subpattern does (positive) or doesn't (negative) match 1545f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * after (look-ahead) or before (look-behind) the current position, without including the matched 1555f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * text in the containing match. The maximum length of possible matches for look-behind patterns 1565f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * must not be unbounded. 1575f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p><table> 1585f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> (?=<i>a</i>) </td> <td>Zero-width positive look-ahead.</td> </tr> 1595f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> (?!<i>a</i>) </td> <td>Zero-width negative look-ahead.</td> </tr> 1605f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> (?<=<i>a</i>) </td> <td>Zero-width positive look-behind.</td> </tr> 1615f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> (?<!<i>a</i>) </td> <td>Zero-width negative look-behind.</td> </tr> 1625f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * </table> 163f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 1645f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <h3>Groups</h3> 165f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 1665f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p><table> 1675f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> (<i>a</i>) </td> <td>A capturing group.</td> </tr> 1685f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> (?:<i>a</i>) </td> <td>A non-capturing group.</td> </tr> 1695f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> (?><i>a</i>) </td> <td>An independent non-capturing group. (The first match of the subgroup is the only match tried.)</td> </tr> 1705f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> \<i>n</i> </td> <td>The text already matched by capturing group <i>n</i>.</td> </tr> 1715f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * </table> 1725f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p>Explicit capturing groups are numbered from 1, and available via {@link Matcher#group}. 1735f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * Group 0 represents the whole match. 174f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 1755f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <h3>Operators</h3> 1765f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p><table> 1775f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> <i>ab</i> </td> <td>Expression <i>a</i> followed by expression <i>b</i>.</td> </tr> 1785f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> <i>a</i>|<i>b</i> </td> <td>Either expression <i>a</i> or expression <i>b</i>.</td> </tr> 1795f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * </table> 180f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 1815f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <a name="flags"><h3>Flags</h3></a> 1825f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p><table> 1835f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> (?dimsux-dimsux:<i>a</i>) </td> <td>Evaluates the expression <i>a</i> with the given flags enabled/disabled.</td> </tr> 1845f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr> <td> (?dimsux-dimsux) </td> <td>Evaluates the rest of the pattern with the given flags enabled/disabled.</td> </tr> 1855f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * </table> 186f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 1875f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p>The flags are: 1885f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <table> 1895f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr><td>{@code i}</td> <td>{@link #CASE_INSENSITIVE}</td> <td>case insensitive matching</td></tr> 1905f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr><td>{@code d}</td> <td>{@link #UNIX_LINES}</td> <td>only accept {@code '\n'} as a line terminator</td></tr> 1915f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr><td>{@code m}</td> <td>{@link #MULTILINE}</td> <td>allow {@code ^} and {@code $} to match beginning/end of any line</td></tr> 1925f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr><td>{@code s}</td> <td>{@link #DOTALL}</td> <td>allow {@code .} to match {@code '\n'} ("s" for "single line")</td></tr> 1935f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr><td>{@code u}</td> <td>{@link #UNICODE_CASE}</td> <td>enable Unicode case folding</td></tr> 1945f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <tr><td>{@code x}</td> <td>{@link #COMMENTS}</td> <td>allow whitespace and comments</td></tr> 1955f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * </table> 1965f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p>Either set of flags may be empty. For example, {@code (?i-m)} would turn on case-insensitivity 1975f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * and turn off multiline mode, {@code (?i)} would just turn on case-insensitivity, 1985f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * and {@code (?-m)} would just turn off multiline mode. 1995f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p>Note that on Android, {@code UNICODE_CASE} is always on: case-insensitive matching will 2005f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * always be Unicode-aware. 2015f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p>There are two other flags not settable via this mechanism: {@link #CANON_EQ} and 2025f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * {@link #LITERAL}. Attempts to use {@link #CANON_EQ} on Android will throw an exception. 2035f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * </span> 204f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 2055f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <h3>Implementation notes</h3> 206f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 2075f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * The regular expression implementation used in Android is provided by 2085f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <a href="http://www.icu-project.org">ICU</a>. The notation for the regular 2095f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * expressions is mostly a superset of those used in other Java language 2105f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * implementations. This means that existing applications will normally work as 2115f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * expected, but in rare cases Android may accept a regular expression that is 2125f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * not accepted by other implementations. 213f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 2145f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p>In some cases, Android will recognize that a regular expression is a simple 2155f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * special case that can be handled more efficiently. This is true of both the convenience methods 2165f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * in {@code String} and the methods in {@code Pattern}. 217f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 218fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * @see Matcher 219fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project */ 220fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Projectpublic final class Pattern implements Serializable { 221f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes 222fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project private static final long serialVersionUID = 5073258162644648461L; 223f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes 224fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project /** 225fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * This constant specifies that a pattern matches Unix line endings ('\n') 2265f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * only against the '.', '^', and '$' meta characters. Corresponds to {@code (?d)}. 227fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project */ 228fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project public static final int UNIX_LINES = 0x01; 229fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project 230fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project /** 231fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * This constant specifies that a {@code Pattern} is matched 232fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * case-insensitively. That is, the patterns "a+" and "A+" would both match 2335f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * the string "aAaAaA". See {@link #UNICODE_CASE}. Corresponds to {@code (?i)}. 234fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project */ 235fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project public static final int CASE_INSENSITIVE = 0x02; 236fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project 237fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project /** 238fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * This constant specifies that a {@code Pattern} may contain whitespace or 239fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * comments. Otherwise comments and whitespace are taken as literal 2405f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * characters. Corresponds to {@code (?x)}. 241fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project */ 242fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project public static final int COMMENTS = 0x04; 243fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project 244fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project /** 245fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * This constant specifies that the meta characters '^' and '$' match only 2465f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * the beginning and end of an input line, respectively. Normally, they 2475f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * match the beginning and the end of the complete input. Corresponds to {@code (?m)}. 248fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project */ 249fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project public static final int MULTILINE = 0x08; 250fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project 251fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project /** 252fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * This constant specifies that the whole {@code Pattern} is to be taken 253fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * literally, that is, all meta characters lose their meanings. 254fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project */ 255fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project public static final int LITERAL = 0x10; 256fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project 257fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project /** 258fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * This constant specifies that the '.' meta character matches arbitrary 259fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * characters, including line endings, which is normally not the case. 2605f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * Corresponds to {@code (?s)}. 261fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project */ 262fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project public static final int DOTALL = 0x20; 263fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project 264fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project /** 2655f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * This constant specifies that a {@code Pattern} that uses case-insensitive matching 2665f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * will use Unicode case folding. On Android, {@code UNICODE_CASE} is always on: 2675f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * case-insensitive matching will always be Unicode-aware. If your code is intended to 2685f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * be portable and uses case-insensitive matching on non-ASCII characters, you should 2695f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * use this flag. Corresponds to {@code (?u)}. 270fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project */ 271fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project public static final int UNICODE_CASE = 0x40; 272fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project 273fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project /** 274fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * This constant specifies that a character in a {@code Pattern} and a 275fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * character in the input string only match if they are canonically 276fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * equivalent. It is (currently) not supported in Android. 277fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project */ 278fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project public static final int CANON_EQ = 0x80; 279fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project 2805f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes private final String pattern; 2815f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes private final int flags; 282fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project 283e377c5a424823bacf28e8ffb2a2f02bfe46d4a2bElliott Hughes transient int address; 28477d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson 285fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project /** 2865f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * Returns a {@link Matcher} for this pattern applied to the given {@code input}. 2875f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * The {@code Matcher} can be used to match the {@code Pattern} against the 28877d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson * whole input, find occurrences of the {@code Pattern} in the input, or 28977d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson * replace parts of the input. 290fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project */ 29177d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson public Matcher matcher(CharSequence input) { 29277d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson return new Matcher(this, input); 29377d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson } 29477d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson 29577d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson /** 2965f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * Splits the given {@code input} at occurrences of this pattern. 29777d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson * 2985f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * <p>If this pattern does not occur in the input, the result is an 29977d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson * array containing the input (converted from a {@code CharSequence} to 30077d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson * a {@code String}). 30177d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson * 30277d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson * <p>Otherwise, the {@code limit} parameter controls the contents of the 30377d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson * returned array as described below. 30477d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson * 30577d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson * @param limit 30677d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson * Determines the maximum number of entries in the resulting 30777d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson * array, and the treatment of trailing empty strings. 30877d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson * <ul> 30977d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson * <li>For n > 0, the resulting array contains at most n 31077d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson * entries. If this is fewer than the number of matches, the 31177d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson * final entry will contain all remaining input. 31277d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson * <li>For n < 0, the length of the resulting array is 31377d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson * exactly the number of occurrences of the {@code Pattern} 31477d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson * plus one for the text after the final separator. 31577d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson * All entries are included. 31677d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson * <li>For n == 0, the result is as for n < 0, except 31777d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson * trailing empty strings will not be returned. (Note that 31877d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson * the case where the input is itself an empty string is 31977d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson * special, as described above, and the limit parameter does 32077d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson * not apply there.) 32177d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson * </ul> 32277d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson */ 3230510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes public String[] split(CharSequence input, int limit) { 3240510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes return Splitter.split(this, pattern, input.toString(), limit); 32577d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson } 32677d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson 32777d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson /** 3285f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * Equivalent to {@code split(input, 0)}. 32977d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson */ 33077d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson public String[] split(CharSequence input) { 33177d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson return split(input, 0); 33277d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson } 33377d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson 33477d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson /** 3355f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * Returns the regular expression supplied to {@code compile}. 33677d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson */ 33777d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson public String pattern() { 33877d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson return pattern; 33977d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson } 34077d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson 34177d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson @Override 34277d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson public String toString() { 34377d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson return pattern; 34477d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson } 34577d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson 34677d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson /** 3475f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * Returns the flags supplied to {@code compile}. 34877d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson */ 34977d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson public int flags() { 35077d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson return flags; 351fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project } 352fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project 353fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project /** 3545f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * Returns a compiled form of the given {@code regularExpression}, as modified by the 3555f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * given {@code flags}. See the <a href="#flags">flags overview</a> for more on flags. 356f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 3575f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * @throws PatternSyntaxException if the regular expression is syntactically incorrect. 358f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 359fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * @see #CANON_EQ 360fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * @see #CASE_INSENSITIVE 361fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * @see #COMMENTS 362fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * @see #DOTALL 363fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * @see #LITERAL 364fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * @see #MULTILINE 365fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * @see #UNICODE_CASE 366fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * @see #UNIX_LINES 367fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project */ 3685f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes public static Pattern compile(String regularExpression, int flags) throws PatternSyntaxException { 3695f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes return new Pattern(regularExpression, flags); 370fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project } 371fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project 372fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project /** 3735f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * Equivalent to {@code Pattern.compile(pattern, 0)}. 374fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project */ 3755f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes public static Pattern compile(String pattern) { 3765f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes return new Pattern(pattern, 0); 3775f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes } 3785f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes 379fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project private Pattern(String pattern, int flags) throws PatternSyntaxException { 380fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project if ((flags & CANON_EQ) != 0) { 381fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project throw new UnsupportedOperationException("CANON_EQ flag not supported"); 382fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project } 383fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project this.pattern = pattern; 384fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project this.flags = flags; 385e377c5a424823bacf28e8ffb2a2f02bfe46d4a2bElliott Hughes compile(); 386fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project } 38777d58e2a1577a4992f4d81e9ca2807f7533725c6Jesse Wilson 388e377c5a424823bacf28e8ffb2a2f02bfe46d4a2bElliott Hughes private void compile() throws PatternSyntaxException { 389fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project if (pattern == null) { 390e377c5a424823bacf28e8ffb2a2f02bfe46d4a2bElliott Hughes throw new NullPointerException("pattern == null"); 391fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project } 392f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes 393e377c5a424823bacf28e8ffb2a2f02bfe46d4a2bElliott Hughes String icuPattern = pattern; 394fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project if ((flags & LITERAL) != 0) { 395e377c5a424823bacf28e8ffb2a2f02bfe46d4a2bElliott Hughes icuPattern = quote(pattern); 396fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project } 397f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes 398fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project // These are the flags natively supported by ICU. 399fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project // They even have the same value in native code. 400e377c5a424823bacf28e8ffb2a2f02bfe46d4a2bElliott Hughes int icuFlags = flags & (CASE_INSENSITIVE | COMMENTS | MULTILINE | DOTALL | UNIX_LINES); 401f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes 402e377c5a424823bacf28e8ffb2a2f02bfe46d4a2bElliott Hughes address = compileImpl(icuPattern, icuFlags); 403fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project } 404fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project 405fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project /** 4065f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * Tests whether the given {@code regularExpression} matches the given {@code input}. 4075f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * Equivalent to {@code Pattern.compile(regularExpression).matcher(input).matches()}. 4085f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * If the same regular expression is to be used for multiple operations, it may be more 4095f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * efficient to reuse a compiled {@code Pattern}. 410f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 411fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * @see Pattern#compile(java.lang.String, int) 412fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project * @see Matcher#matches() 413fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project */ 4145f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes public static boolean matches(String regularExpression, CharSequence input) { 4155f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes return new Matcher(new Pattern(regularExpression, 0), input).matches(); 416fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project } 417fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project 418fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project /** 4195f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * Quotes the given {@code string} using "\Q" and "\E", so that all 4205f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * meta-characters lose their special meaning. This method correctly 4215f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * escapes embedded instances of "\Q" or "\E". If the entire result 4225f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * is to be passed verbatim to {@link #compile}, it's usually clearer 4235f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes * to use the {@link #LITERAL} flag instead. 424fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project */ 4255f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes public static String quote(String string) { 4265f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes StringBuilder sb = new StringBuilder(); 4275f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes sb.append("\\Q"); 428fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project int apos = 0; 429fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project int k; 4305f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes while ((k = string.indexOf("\\E", apos)) >= 0) { 4315f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes sb.append(string.substring(apos, k + 2)).append("\\\\E\\Q"); 432fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project apos = k + 2; 433fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project } 4345f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes return sb.append(string.substring(apos)).append("\\E").toString(); 435fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project } 436f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes 437fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project @Override 438dd828f42a5c83b4270d4fbf6fce2da1878f1e84aThe Android Open Source Project protected void finalize() throws Throwable { 439dd828f42a5c83b4270d4fbf6fce2da1878f1e84aThe Android Open Source Project try { 440e377c5a424823bacf28e8ffb2a2f02bfe46d4a2bElliott Hughes closeImpl(address); 4415f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes } finally { 442dd828f42a5c83b4270d4fbf6fce2da1878f1e84aThe Android Open Source Project super.finalize(); 443fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project } 444fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project } 445fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project 4465f37da05bb48298568f8abd7c97c3d11552e1867Elliott Hughes private void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException { 447fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project s.defaultReadObject(); 448e377c5a424823bacf28e8ffb2a2f02bfe46d4a2bElliott Hughes compile(); 449fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project } 450e377c5a424823bacf28e8ffb2a2f02bfe46d4a2bElliott Hughes 451e377c5a424823bacf28e8ffb2a2f02bfe46d4a2bElliott Hughes private static native void closeImpl(int addr); 452e377c5a424823bacf28e8ffb2a2f02bfe46d4a2bElliott Hughes private static native int compileImpl(String regex, int flags); 453fdb2704414a9ed92394ada0d1395e4db8688946The Android Open Source Project} 454