12ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* GENERATED SOURCE. DO NOT MODIFY. */ 2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others. 3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License 42ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* 52ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ******************************************************************************* 6bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * Copyright (C) 1996-2016, International Business Machines Corporation and 72ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * others. All Rights Reserved. 82ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ******************************************************************************* 92ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpackage android.icu.text; 112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.io.IOException; 132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.text.ParsePosition; 142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.ArrayList; 152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.Collection; 162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.Collections; 172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.Iterator; 182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.NoSuchElementException; 192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.TreeSet; 202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.BMPSet; 222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.Norm2AllModes; 232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.PatternProps; 242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.RuleCharacterIterator; 252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.SortedSetRelation; 262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.StringRange; 272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.UBiDiProps; 282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.UCaseProps; 292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.UCharacterProperty; 302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.UPropertyAliases; 312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.UnicodeSetStringSpan; 322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.Utility; 332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.CharSequences; 342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UCharacter; 352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UProperty; 362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UScript; 372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.util.Freezable; 382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.util.ICUUncheckedIOException; 392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.util.OutputInt; 402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.util.ULocale; 412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.util.VersionInfo; 422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/** 442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * A mutable set of Unicode characters and multicharacter strings. 452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Objects of this class represent <em>character classes</em> used 462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * in regular expressions. A character specifies a subset of Unicode 472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * code points. Legal code points are U+0000 to U+10FFFF, inclusive. 482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Note: method freeze() will not only make the set immutable, but 502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * also makes important methods much higher performance: 512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * contains(c), containsNone(...), span(...), spanBack(...) etc. 522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * After the object is frozen, any subsequent call that wants to change 532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the object will throw UnsupportedOperationException. 542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p>The UnicodeSet class is not designed to be subclassed. 562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p><code>UnicodeSet</code> supports two APIs. The first is the 582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <em>operand</em> API that allows the caller to modify the value of 592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * a <code>UnicodeSet</code> object. It conforms to Java 2's 602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <code>java.util.Set</code> interface, although 612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <code>UnicodeSet</code> does not actually implement that 622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * interface. All methods of <code>Set</code> are supported, with the 632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * modification that they take a character range or single character 642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * instead of an <code>Object</code>, and they take a 652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <code>UnicodeSet</code> instead of a <code>Collection</code>. The 662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * operand API may be thought of in terms of boolean logic: a boolean 672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * OR is implemented by <code>add</code>, a boolean AND is implemented 682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * by <code>retain</code>, a boolean XOR is implemented by 692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <code>complement</code> taking an argument, and a boolean NOT is 702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * implemented by <code>complement</code> with no argument. In terms 712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * of traditional set theory function names, <code>add</code> is a 722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * union, <code>retain</code> is an intersection, <code>remove</code> 732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * is an asymmetric difference, and <code>complement</code> with no 742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * argument is a set complement with respect to the superset range 752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <code>MIN_VALUE-MAX_VALUE</code> 762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p>The second API is the 782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <code>applyPattern()</code>/<code>toPattern()</code> API from the 792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <code>java.text.Format</code>-derived classes. Unlike the 802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * methods that add characters, add categories, and control the logic 812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * of the set, the method <code>applyPattern()</code> sets all 822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * attributes of a <code>UnicodeSet</code> at once, based on a 832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * string pattern. 842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p><b>Pattern syntax</b></p> 862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Patterns are accepted by the constructors and the 882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <code>applyPattern()</code> methods and returned by the 892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <code>toPattern()</code> method. These patterns follow a syntax 902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * similar to that employed by version 8 regular expression character 912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * classes. Here are some simple examples: 922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <blockquote> 942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <table> 95bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <tr style="vertical-align: top"> 96bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="white-space: nowrap; vertical-align: top; horizontal-align: left;"><code>[]</code></td> 97bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="vertical-align: top;">No characters</td> 98bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * </tr><tr style="vertical-align: top"> 99bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="white-space: nowrap; vertical-align: top; horizontal-align: left;"><code>[a]</code></td> 100bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="vertical-align: top;">The character 'a'</td> 101bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * </tr><tr style="vertical-align: top"> 102bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="white-space: nowrap; vertical-align: top; horizontal-align: left;"><code>[ae]</code></td> 103bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="vertical-align: top;">The characters 'a' and 'e'</td> 1042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </tr> 1052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <tr> 106bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="white-space: nowrap; vertical-align: top; horizontal-align: left;"><code>[a-e]</code></td> 107bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="vertical-align: top;">The characters 'a' through 'e' inclusive, in Unicode code 1082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * point order</td> 1092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </tr> 1102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <tr> 111bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="white-space: nowrap; vertical-align: top; horizontal-align: left;"><code>[\\u4E01]</code></td> 112bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="vertical-align: top;">The character U+4E01</td> 1132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </tr> 1142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <tr> 115bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="white-space: nowrap; vertical-align: top; horizontal-align: left;"><code>[a{ab}{ac}]</code></td> 116bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="vertical-align: top;">The character 'a' and the multicharacter strings "ab" and 1172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * "ac"</td> 1182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </tr> 1192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <tr> 120bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="white-space: nowrap; vertical-align: top; horizontal-align: left;"><code>[\p{Lu}]</code></td> 121bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="vertical-align: top;">All characters in the general category Uppercase Letter</td> 1222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </tr> 1232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </table> 1242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </blockquote> 1252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 1262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Any character may be preceded by a backslash in order to remove any special 1272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * meaning. White space characters, as defined by the Unicode Pattern_White_Space property, are 1282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * ignored, unless they are escaped. 1292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 1302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p>Property patterns specify a set of characters having a certain 1312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * property as defined by the Unicode standard. Both the POSIX-like 1322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * "[:Lu:]" and the Perl-like syntax "\p{Lu}" are recognized. For a 1332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * complete list of supported property patterns, see the User's Guide 1342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * for UnicodeSet at 1352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <a href="http://www.icu-project.org/userguide/unicodeSet.html"> 1362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * http://www.icu-project.org/userguide/unicodeSet.html</a>. 1372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Actual determination of property data is defined by the underlying 1382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Unicode database as implemented by UCharacter. 1392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 1402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p>Patterns specify individual characters, ranges of characters, and 1412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Unicode property sets. When elements are concatenated, they 1422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * specify their union. To complement a set, place a '^' immediately 1432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * after the opening '['. Property patterns are inverted by modifying 1442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * their delimiters; "[:^foo]" and "\P{foo}". In any other location, 1452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * '^' has no special meaning. 1462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 1472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p>Ranges are indicated by placing two a '-' between two 1482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * characters, as in "a-z". This specifies the range of all 1492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * characters from the left to the right, in Unicode order. If the 1502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * left character is greater than or equal to the 1512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * right character it is a syntax error. If a '-' occurs as the first 1522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * character after the opening '[' or '[^', or if it occurs as the 1532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * last character before the closing ']', then it is taken as a 1542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * literal. Thus "[a\\-b]", "[-ab]", and "[ab-]" all indicate the same 1552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * set of three characters, 'a', 'b', and '-'. 1562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 157bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <p>Sets may be intersected using the '&' operator or the asymmetric 1582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * set difference may be taken using the '-' operator, for example, 159bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * "[[:L:]&[\\u0000-\\u0FFF]]" indicates the set of all Unicode letters 160bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * with values less than 4096. Operators ('&' and '|') have equal 1612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * precedence and bind left-to-right. Thus 1622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * "[[:L:]-[a-z]-[\\u0100-\\u01FF]]" is equivalent to 1632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * "[[[:L:]-[a-z]]-[\\u0100-\\u01FF]]". This only really matters for 1642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * difference; intersection is commutative. 1652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 1662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <table> 167bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <tr style="vertical-align: top;"><td style="white-space: nowrap;"><code>[a]</code><td>The set containing 'a' 168bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <tr style="vertical-align: top;"><td style="white-space: nowrap;"><code>[a-z]</code><td>The set containing 'a' 1692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * through 'z' and all letters in between, in Unicode order 170bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <tr style="vertical-align: top;"><td style="white-space: nowrap;"><code>[^a-z]</code><td>The set containing 1712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * all characters but 'a' through 'z', 1722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * that is, U+0000 through 'a'-1 and 'z'+1 through U+10FFFF 173bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <tr style="vertical-align: top;"><td style="white-space: nowrap;"><code>[[<em>pat1</em>][<em>pat2</em>]]</code> 1742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <td>The union of sets specified by <em>pat1</em> and <em>pat2</em> 175bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <tr style="vertical-align: top;"><td style="white-space: nowrap;"><code>[[<em>pat1</em>]&[<em>pat2</em>]]</code> 1762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <td>The intersection of sets specified by <em>pat1</em> and <em>pat2</em> 177bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <tr style="vertical-align: top;"><td style="white-space: nowrap;"><code>[[<em>pat1</em>]-[<em>pat2</em>]]</code> 1782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <td>The asymmetric difference of sets specified by <em>pat1</em> and 1792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <em>pat2</em> 180bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <tr style="vertical-align: top;"><td style="white-space: nowrap;"><code>[:Lu:] or \p{Lu}</code> 1812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <td>The set of characters having the specified 1822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Unicode property; in 1832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * this case, Unicode uppercase letters 184bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <tr style="vertical-align: top;"><td style="white-space: nowrap;"><code>[:^Lu:] or \P{Lu}</code> 1852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <td>The set of characters <em>not</em> having the given 1862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Unicode property 1872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </table> 1882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 1892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p><b>Warning</b>: you cannot add an empty string ("") to a UnicodeSet.</p> 1902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 1912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p><b>Formal syntax</b></p> 1922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 1932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <blockquote> 1942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <table> 195bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <tr style="vertical-align: top"> 196bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="white-space: nowrap; vertical-align: top;" align="right"><code>pattern := </code></td> 197bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="vertical-align: top;"><code>('[' '^'? item* ']') | 1982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * property</code></td> 1992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </tr> 200bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <tr style="vertical-align: top"> 201bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="white-space: nowrap; vertical-align: top;" align="right"><code>item := </code></td> 202bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="vertical-align: top;"><code>char | (char '-' char) | pattern-expr<br> 2032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </code></td> 2042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </tr> 205bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <tr style="vertical-align: top"> 206bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="white-space: nowrap; vertical-align: top;" align="right"><code>pattern-expr := </code></td> 207bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="vertical-align: top;"><code>pattern | pattern-expr pattern | 2082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * pattern-expr op pattern<br> 2092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </code></td> 2102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </tr> 211bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <tr style="vertical-align: top"> 212bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="white-space: nowrap; vertical-align: top;" align="right"><code>op := </code></td> 213bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="vertical-align: top;"><code>'&' | '-'<br> 2142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </code></td> 2152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </tr> 216bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <tr style="vertical-align: top"> 217bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="white-space: nowrap; vertical-align: top;" align="right"><code>special := </code></td> 218bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="vertical-align: top;"><code>'[' | ']' | '-'<br> 2192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </code></td> 2202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </tr> 221bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <tr style="vertical-align: top"> 222bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="white-space: nowrap; vertical-align: top;" align="right"><code>char := </code></td> 223bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="vertical-align: top;"><em>any character that is not</em><code> special<br> 2242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * | ('\\' </code><em>any character</em><code>)<br> 2252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * | ('\u' hex hex hex hex)<br> 2262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </code></td> 2272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </tr> 228bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <tr style="vertical-align: top"> 229bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="white-space: nowrap; vertical-align: top;" align="right"><code>hex := </code></td> 230bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="vertical-align: top;"><em>any character for which 2312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </em><code>Character.digit(c, 16)</code><em> 2322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * returns a non-negative result</em></td> 2332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </tr> 2342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <tr> 235bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="white-space: nowrap; vertical-align: top;" align="right"><code>property := </code></td> 236bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="vertical-align: top;"><em>a Unicode property set pattern</em></td> 2372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </tr> 2382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </table> 2392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <br> 2402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <table border="1"> 2412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <tr> 2422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <td>Legend: <table> 2432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <tr> 244bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="white-space: nowrap; vertical-align: top;"><code>a := b</code></td> 245bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="width: 20; vertical-align: top;"> </td> 246bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="vertical-align: top;"><code>a</code> may be replaced by <code>b</code> </td> 2472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </tr> 2482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <tr> 249bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="white-space: nowrap; vertical-align: top;"><code>a?</code></td> 250bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="vertical-align: top;"></td> 251bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="vertical-align: top;">zero or one instance of <code>a</code><br> 2522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </td> 2532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </tr> 2542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <tr> 255bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="white-space: nowrap; vertical-align: top;"><code>a*</code></td> 256bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="vertical-align: top;"></td> 257bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="vertical-align: top;">one or more instances of <code>a</code><br> 2582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </td> 2592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </tr> 2602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <tr> 261bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="white-space: nowrap; vertical-align: top;"><code>a | b</code></td> 262bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="vertical-align: top;"></td> 263bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="vertical-align: top;">either <code>a</code> or <code>b</code><br> 2642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </td> 2652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </tr> 2662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <tr> 267bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="white-space: nowrap; vertical-align: top;"><code>'a'</code></td> 268bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="vertical-align: top;"></td> 269bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <td style="vertical-align: top;">the literal string between the quotes </td> 2702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </tr> 2712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </table> 2722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </td> 2732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </tr> 2742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </table> 2752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </blockquote> 2762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p>To iterate over contents of UnicodeSet, the following are available: 2772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <ul><li>{@link #ranges()} to iterate through the ranges</li> 2782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <li>{@link #strings()} to iterate through the strings</li> 2792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <li>{@link #iterator()} to iterate through the entire contents in a single loop. 2802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * That method is, however, not particularly efficient, since it "boxes" each code point into a String. 2812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </ul> 2822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * All of the above can be used in <b>for</b> loops. 2832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The {@link android.icu.text.UnicodeSetIterator UnicodeSetIterator} can also be used, but not in <b>for</b> loops. 2842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p>To replace, count elements, or delete spans, see {@link android.icu.text.UnicodeSetSpanner UnicodeSetSpanner}. 2852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 2862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @author Alan Liu 2872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see UnicodeSetIterator 2882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see UnicodeSetSpanner 2892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpublic class UnicodeSet extends UnicodeFilter implements Iterable<String>, Comparable<UnicodeSet>, Freezable<UnicodeSet> { 2912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Constant for the empty set. 2942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final UnicodeSet EMPTY = new UnicodeSet().freeze(); 2962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Constant for the set of all code points. (Since UnicodeSets can include strings, does not include everything that a UnicodeSet can.) 2982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final UnicodeSet ALL_CODE_POINTS = new UnicodeSet(0, 0x10FFFF).freeze(); 3002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static XSymbolTable XSYMBOL_TABLE = null; // for overriding the the function processing 3022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int LOW = 0x000000; // LOW <= all valid values. ZERO for codepoints 3042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int HIGH = 0x110000; // HIGH > all valid values. 10000 for code units. 3052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 110000 for codepoints 3062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 3082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Minimum value that can be stored in a UnicodeSet. 3092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int MIN_VALUE = LOW; 3112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 3132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Maximum value that can be stored in a UnicodeSet. 3142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int MAX_VALUE = HIGH - 1; 3162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int len; // length used; list may be longer to minimize reallocs 3182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int[] list; // MUST be terminated with HIGH 3192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int[] rangeList; // internal buffer 3202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int[] buffer; // internal buffer 3212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // NOTE: normally the field should be of type SortedSet; but that is missing a public clone!! 3232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // is not private so that UnicodeSetIterator can get access 3242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller TreeSet<String> strings = new TreeSet<String>(); 3252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 3272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The pattern representation of this set. This may not be the 3282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * most economical pattern. It is the pattern supplied to 3292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * applyPattern(), with variables substituted and whitespace 3302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * removed. For sets constructed without applyPattern(), or 3312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * modified using the non-pattern API, this string will be null, 3322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * indicating that toPattern() must generate a pattern 3332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * representation from the inversion list. 3342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private String pat = null; 3362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int START_EXTRA = 16; // initial storage. Must be >= 0 3382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int GROW_EXTRA = START_EXTRA; // extra amount for growth. Must be >= 0 3392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Special property set IDs 3412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final String ANY_ID = "ANY"; // [\u0000-\U0010FFFF] 3422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final String ASCII_ID = "ASCII"; // [\u0000-\u007F] 3432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final String ASSIGNED = "Assigned"; // [:^Cn:] 3442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 3462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * A set of all characters _except_ the second through last characters of 3472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * certain ranges. These ranges are ranges of characters whose 3482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * properties are all exactly alike, e.g. CJK Ideographs from 3492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * U+4E00 to U+9FA5. 3502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static UnicodeSet INCLUSIONS[] = null; 3522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private volatile BMPSet bmpSet; // The set is frozen if bmpSet or stringSpan is not null. 3542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private volatile UnicodeSetStringSpan stringSpan; 3552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //---------------------------------------------------------------- 3562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Public API 3572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //---------------------------------------------------------------- 3582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 3602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Constructs an empty set. 3612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet() { 3632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list = new int[1 + START_EXTRA]; 3642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list[len++] = HIGH; 3652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 3682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Constructs a copy of an existing set. 3692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet(UnicodeSet other) { 3712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set(other); 3722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 375bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * Constructs a set containing the given range. If <code>end > 3762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * start</code> then an empty set is created. 3772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 3782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param start first character, inclusive, of range 3792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param end last character, inclusive, of range 3802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet(int start, int end) { 3822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller this(); 3832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller complement(start, end); 3842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 387bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * Quickly constructs a set from a set of ranges <s0, e0, s1, e1, s2, e2, ..., sn, en>. 3882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * There must be an even number of integers, and they must be all greater than zero, 3892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * all less than or equal to Character.MAX_CODE_POINT. 390bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * In each pair (..., si, ei, ...) it must be true that si <= ei 391bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * Between adjacent pairs (...ei, sj...), it must be true that ei+1 < sj 3922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param pairs pairs of character representing ranges 3932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet(int... pairs) { 3952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if ((pairs.length & 1) != 0) { 3962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("Must have even number of integers"); 3972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list = new int[pairs.length + 1]; // don't allocate extra space, because it is likely that this is a fixed set. 3992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller len = list.length; 4002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int last = -1; // used to ensure that the results are monotonically increasing. 4012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i = 0; 4022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (i < pairs.length) { 4032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // start of pair 4042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int start = pairs[i]; 4052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (last >= start) { 4062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("Must be monotonically increasing."); 4072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list[i++] = last = start; 4092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // end of pair 4102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int end = pairs[i] + 1; 4112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (last >= end) { 4122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("Must be monotonically increasing."); 4132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list[i++] = last = end; 4152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list[i] = HIGH; // terminate 4172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 4202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Constructs a set from the given pattern. See the class description 4212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * for the syntax of the pattern language. Whitespace is ignored. 4222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param pattern a string specifying what characters are in the set 4232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @exception java.lang.IllegalArgumentException if the pattern contains 4242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * a syntax error. 4252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 4262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet(String pattern) { 4272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller this(); 4282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller applyPattern(pattern, null, null, IGNORE_SPACE); 4292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 4322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Constructs a set from the given pattern. See the class description 4332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * for the syntax of the pattern language. 4342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param pattern a string specifying what characters are in the set 4352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param ignoreWhitespace if true, ignore Unicode Pattern_White_Space characters 4362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @exception java.lang.IllegalArgumentException if the pattern contains 4372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * a syntax error. 4382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 4392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet(String pattern, boolean ignoreWhitespace) { 4402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller this(); 4412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller applyPattern(pattern, null, null, ignoreWhitespace ? IGNORE_SPACE : 0); 4422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 4452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Constructs a set from the given pattern. See the class description 4462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * for the syntax of the pattern language. 4472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param pattern a string specifying what characters are in the set 4482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param options a bitmask indicating which options to apply. 4492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Valid options are IGNORE_SPACE and CASE. 4502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @exception java.lang.IllegalArgumentException if the pattern contains 4512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * a syntax error. 4522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 4532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet(String pattern, int options) { 4542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller this(); 4552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller applyPattern(pattern, null, null, options); 4562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 4592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Constructs a set from the given pattern. See the class description 4602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * for the syntax of the pattern language. 4612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param pattern a string specifying what characters are in the set 4622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param pos on input, the position in pattern at which to start parsing. 4632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * On output, the position after the last character parsed. 4642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param symbols a symbol table mapping variables to char[] arrays 4652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * and chars to UnicodeSets 4662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @exception java.lang.IllegalArgumentException if the pattern 4672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * contains a syntax error. 4682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 4692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet(String pattern, ParsePosition pos, SymbolTable symbols) { 4702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller this(); 4712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller applyPattern(pattern, pos, symbols, IGNORE_SPACE); 4722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 4752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Constructs a set from the given pattern. See the class description 4762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * for the syntax of the pattern language. 4772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param pattern a string specifying what characters are in the set 4782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param pos on input, the position in pattern at which to start parsing. 4792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * On output, the position after the last character parsed. 4802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param symbols a symbol table mapping variables to char[] arrays 4812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * and chars to UnicodeSets 4822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param options a bitmask indicating which options to apply. 4832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Valid options are IGNORE_SPACE and CASE. 4842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @exception java.lang.IllegalArgumentException if the pattern 4852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * contains a syntax error. 4862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 4872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet(String pattern, ParsePosition pos, SymbolTable symbols, int options) { 4882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller this(); 4892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller applyPattern(pattern, pos, symbols, options); 4902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 4942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Return a new set that is equivalent to this one. 4952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 496f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 4972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public Object clone() { 4982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (isFrozen()) { 4992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 5002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeSet result = new UnicodeSet(this); 5022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.bmpSet = this.bmpSet; 5032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.stringSpan = this.stringSpan; 5042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return result; 5052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 5082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Make this object represent the range <code>start - end</code>. 509bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * If <code>end > start</code> then this object is set to an 5102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * an empty range. 5112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 5122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param start first character in the set, inclusive 5132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param end last character in the set, inclusive 5142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 5152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet set(int start, int end) { 5162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller checkFrozen(); 5172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller clear(); 5182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller complement(start, end); 5192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 5202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 5232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Make this object represent the same set as <code>other</code>. 5242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param other a <code>UnicodeSet</code> whose value will be 5252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * copied to this object 5262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 5272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet set(UnicodeSet other) { 5282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller checkFrozen(); 5292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list = other.list.clone(); 5302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller len = other.len; 5312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pat = other.pat; 5322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller strings = new TreeSet<String>(other.strings); 5332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 5342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 5372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Modifies this set to represent the set specified by the given pattern. 5382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * See the class description for the syntax of the pattern language. 5392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Whitespace is ignored. 5402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param pattern a string specifying what characters are in the set 5412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @exception java.lang.IllegalArgumentException if the pattern 5422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * contains a syntax error. 5432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 5442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final UnicodeSet applyPattern(String pattern) { 5452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller checkFrozen(); 5462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return applyPattern(pattern, null, null, IGNORE_SPACE); 5472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 5502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Modifies this set to represent the set specified by the given pattern, 5512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * optionally ignoring whitespace. 5522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * See the class description for the syntax of the pattern language. 5532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param pattern a string specifying what characters are in the set 5542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param ignoreWhitespace if true then Unicode Pattern_White_Space characters are ignored 5552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @exception java.lang.IllegalArgumentException if the pattern 5562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * contains a syntax error. 5572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 5582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet applyPattern(String pattern, boolean ignoreWhitespace) { 5592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller checkFrozen(); 5602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return applyPattern(pattern, null, null, ignoreWhitespace ? IGNORE_SPACE : 0); 5612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 5642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Modifies this set to represent the set specified by the given pattern, 5652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * optionally ignoring whitespace. 5662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * See the class description for the syntax of the pattern language. 5672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param pattern a string specifying what characters are in the set 5682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param options a bitmask indicating which options to apply. 5692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Valid options are IGNORE_SPACE and CASE. 5702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @exception java.lang.IllegalArgumentException if the pattern 5712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * contains a syntax error. 5722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 5732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet applyPattern(String pattern, int options) { 5742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller checkFrozen(); 5752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return applyPattern(pattern, null, null, options); 5762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 5792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Return true if the given position, in the given pattern, appears 5802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * to be the start of a UnicodeSet pattern. 58139fda05a2af93ea1422c26c0e570d6d7b4a4f4eeJoachim Sauer * @hide unsupported on Android 5822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 5832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static boolean resemblesPattern(String pattern, int pos) { 5842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ((pos+1) < pattern.length() && 5852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pattern.charAt(pos) == '[') || 5862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller resemblesPropertyPattern(pattern, pos); 5872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 5902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * TODO: create Appendable version of UTF16.append(buf, c), 5912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * maybe in new class Appendables? 5922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @throws IOException 5932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 5942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static void appendCodePoint(Appendable app, int c) { 5952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert 0 <= c && c <= 0x10ffff; 5962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 5972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c <= 0xffff) { 5982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller app.append((char) c); 5992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 6002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller app.append(UTF16.getLeadSurrogate(c)).append(UTF16.getTrailSurrogate(c)); 6012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch (IOException e) { 6032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new ICUUncheckedIOException(e); 6042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 6082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * TODO: create class Appendables? 6092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @throws IOException 6102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 6112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static void append(Appendable app, CharSequence s) { 6122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 6132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller app.append(s); 6142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch (IOException e) { 6152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new ICUUncheckedIOException(e); 6162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 6202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Append the <code>toPattern()</code> representation of a 6212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * string to the given <code>Appendable</code>. 6222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 6232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static <T extends Appendable> T _appendToPat(T buf, String s, boolean escapeUnprintable) { 6242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int cp; 6252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i = 0; i < s.length(); i += Character.charCount(cp)) { 6262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cp = s.codePointAt(i); 6272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller _appendToPat(buf, cp, escapeUnprintable); 6282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return buf; 6302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 6332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Append the <code>toPattern()</code> representation of a 6342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * character to the given <code>Appendable</code>. 6352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 6362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static <T extends Appendable> T _appendToPat(T buf, int c, boolean escapeUnprintable) { 6372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 6382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (escapeUnprintable && Utility.isUnprintable(c)) { 6392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Use hex escape notation (<backslash>uxxxx or <backslash>Uxxxxxxxx) for anything 6402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // unprintable 6412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (Utility.escapeUnprintable(buf, c)) { 6422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return buf; 6432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Okay to let ':' pass through 6462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller switch (c) { 6472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case '[': // SET_OPEN: 6482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case ']': // SET_CLOSE: 6492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case '-': // HYPHEN: 6502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case '^': // COMPLEMENT: 6512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case '&': // INTERSECTION: 6522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case '\\': //BACKSLASH: 6532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case '{': 6542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case '}': 6552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case '$': 6562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case ':': 6572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buf.append('\\'); 6582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 6592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller default: 6602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Escape whitespace 6612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (PatternProps.isWhiteSpace(c)) { 6622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buf.append('\\'); 6632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 6652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller appendCodePoint(buf, c); 6672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return buf; 6682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch (IOException e) { 6692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new ICUUncheckedIOException(e); 6702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 6742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns a string representation of this set. If the result of 6752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * calling this function is passed to a UnicodeSet constructor, it 6762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * will produce another set that is equal to this one. 6772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 678f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 6792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public String toPattern(boolean escapeUnprintable) { 6802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (pat != null && !escapeUnprintable) { 6812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return pat; 6822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder result = new StringBuilder(); 6842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return _toPattern(result, escapeUnprintable).toString(); 6852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 6882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Append a string representation of this set to result. This will be 6892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * a cleaned version of the string passed to applyPattern(), if there 6902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * is one. Otherwise it will be generated. 6912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 6922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private <T extends Appendable> T _toPattern(T result, 6932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean escapeUnprintable) { 6942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (pat == null) { 6952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return appendNewPattern(result, escapeUnprintable, true); 6962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 6982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!escapeUnprintable) { 6992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append(pat); 7002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return result; 7012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean oddNumberOfBackslashes = false; 7032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i=0; i<pat.length(); ) { 7042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c = pat.codePointAt(i); 7052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i += Character.charCount(c); 7062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (Utility.isUnprintable(c)) { 7072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If the unprintable character is preceded by an odd 7082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // number of backslashes, then it has been escaped 7092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // and we omit the last backslash. 7102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Utility.escapeUnprintable(result, c); 7112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller oddNumberOfBackslashes = false; 7122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (!oddNumberOfBackslashes && c == '\\') { 7132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Temporarily withhold an odd-numbered backslash. 7142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller oddNumberOfBackslashes = true; 7152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 7162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (oddNumberOfBackslashes) { 7172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append('\\'); 7182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller appendCodePoint(result, c); 7202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller oddNumberOfBackslashes = false; 7212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (oddNumberOfBackslashes) { 7242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append('\\'); 7252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return result; 7272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch (IOException e) { 7282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new ICUUncheckedIOException(e); 7292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 7332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Generate and append a string representation of this set to result. 7342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * This does not use this.pat, the cleaned up copy of the string 7352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * passed to applyPattern(). 7362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param result the buffer into which to generate the pattern 7372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param escapeUnprintable escape unprintable characters if true 7382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 7392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public StringBuffer _generatePattern(StringBuffer result, boolean escapeUnprintable) { 7402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return _generatePattern(result, escapeUnprintable, true); 7412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 7442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Generate and append a string representation of this set to result. 7452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * This does not use this.pat, the cleaned up copy of the string 7462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * passed to applyPattern(). 7472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param includeStrings if false, doesn't include the strings. 7482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 7492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public StringBuffer _generatePattern(StringBuffer result, 7502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean escapeUnprintable, boolean includeStrings) { 7512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return appendNewPattern(result, escapeUnprintable, includeStrings); 7522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private <T extends Appendable> T appendNewPattern( 7552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller T result, boolean escapeUnprintable, boolean includeStrings) { 7562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 7572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append('['); 7582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int count = getRangeCount(); 7602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If the set contains at least 2 intervals and includes both 7622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // MIN_VALUE and MAX_VALUE, then the inverse representation will 7632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // be more economical. 7642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (count > 1 && 7652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller getRangeStart(0) == MIN_VALUE && 7662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller getRangeEnd(count-1) == MAX_VALUE) { 7672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Emit the inverse 7692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append('^'); 7702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i = 1; i < count; ++i) { 7722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int start = getRangeEnd(i-1)+1; 7732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int end = getRangeStart(i)-1; 7742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller _appendToPat(result, start, escapeUnprintable); 7752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (start != end) { 7762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if ((start+1) != end) { 7772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append('-'); 7782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller _appendToPat(result, end, escapeUnprintable); 7802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Default; emit the ranges as pairs 7852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 7862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i = 0; i < count; ++i) { 7872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int start = getRangeStart(i); 7882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int end = getRangeEnd(i); 7892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller _appendToPat(result, start, escapeUnprintable); 7902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (start != end) { 7912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if ((start+1) != end) { 7922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append('-'); 7932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller _appendToPat(result, end, escapeUnprintable); 7952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (includeStrings && strings.size() > 0) { 8002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (String s : strings) { 8012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append('{'); 8022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller _appendToPat(result, s, escapeUnprintable); 8032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append('}'); 8042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append(']'); 8072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return result; 8082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch (IOException e) { 8092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new ICUUncheckedIOException(e); 8102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 8142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns the number of elements in this set (its cardinality) 8152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Note than the elements of a set may include both individual 8162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * codepoints and strings. 8172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 8182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return the number of elements in this set (its cardinality). 8192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 8202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int size() { 8212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int n = 0; 8222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int count = getRangeCount(); 8232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i = 0; i < count; ++i) { 8242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n += getRangeEnd(i) - getRangeStart(i) + 1; 8252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return n + strings.size(); 8272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 8302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns <tt>true</tt> if this set contains no elements. 8312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 8322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return <tt>true</tt> if this set contains no elements. 8332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 8342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean isEmpty() { 8352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return len == 1 && strings.size() == 0; 8362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 8392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Implementation of UnicodeMatcher API. Returns <tt>true</tt> if 8402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * this set contains any character whose low byte is the given 8412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * value. This is used by <tt>RuleBasedTransliterator</tt> for 8422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * indexing. 8432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 844f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 8452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean matchesIndexValue(int v) { 8462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* The index value v, in the range [0,255], is contained in this set if 8472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * it is contained in any pair of this set. Pairs either have the high 8482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * bytes equal, or unequal. If the high bytes are equal, then we have 8492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * aaxx..aayy, where aa is the high byte. Then v is contained if xx <= 8502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * v <= yy. If the high bytes are unequal we have aaxx..bbyy, bb>aa. 8512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Then v is contained if xx <= v || v <= yy. (This is identical to the 8522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * time zone month containment logic.) 8532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 8542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i=0; i<getRangeCount(); ++i) { 8552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int low = getRangeStart(i); 8562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int high = getRangeEnd(i); 8572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if ((low & ~0xFF) == (high & ~0xFF)) { 8582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if ((low & 0xFF) <= v && v <= (high & 0xFF)) { 8592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 8602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if ((low & 0xFF) <= v || v <= (high & 0xFF)) { 8622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 8632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (strings.size() != 0) { 8662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (String s : strings) { 8672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //if (s.length() == 0) { 8682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // // Empty strings match everything 8692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // return true; 8702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //} 8712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // assert(s.length() != 0); // We enforce this elsewhere 8722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c = UTF16.charAt(s, 0); 8732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if ((c & 0xFF) == v) { 8742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 8752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 8792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 8822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Implementation of UnicodeMatcher.matches(). Always matches the 8832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * longest possible multichar string. 8842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 885f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 8862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int matches(Replaceable text, 8872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int[] offset, 8882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int limit, 8892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean incremental) { 8902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (offset[0] == limit) { 8922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Strings, if any, have length != 0, so we don't worry 8932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // about them here. If we ever allow zero-length strings 8942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // we much check for them here. 8952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (contains(UnicodeMatcher.ETHER)) { 896f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert return incremental ? U_PARTIAL_MATCH : U_MATCH; 8972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 8982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return U_MISMATCH; 8992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 9012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (strings.size() != 0) { // try strings first 9022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // might separate forward and backward loops later 9042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // for now they are combined 9052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // TODO Improve efficiency of this, at least in the forward 9072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // direction, if not in both. In the forward direction we 9082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // can assume the strings are sorted. 9092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean forward = offset[0] < limit; 9112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // firstChar is the leftmost char to match in the 9132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // forward direction or the rightmost char to match in 9142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the reverse direction. 9152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char firstChar = text.charAt(offset[0]); 9162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If there are multiple strings that can match we 9182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // return the longest match. 9192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int highWaterLength = 0; 9202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (String trial : strings) { 9222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //if (trial.length() == 0) { 9232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // return U_MATCH; // null-string always matches 9242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //} 9252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // assert(trial.length() != 0); // We ensure this elsewhere 9262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char c = trial.charAt(forward ? 0 : trial.length() - 1); 9282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Strings are sorted, so we can optimize in the 9302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // forward direction. 9312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (forward && c > firstChar) break; 932f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert if (c != firstChar) continue; 9332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int length = matchRest(text, offset[0], limit, trial); 9352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (incremental) { 9372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int maxLen = forward ? limit-offset[0] : offset[0]-limit; 9382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (length == maxLen) { 9392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We have successfully matched but only up to limit. 9402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return U_PARTIAL_MATCH; 9412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (length == trial.length()) { 9452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We have successfully matched the whole string. 9462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (length > highWaterLength) { 9472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller highWaterLength = length; 9482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // In the forward direction we know strings 9502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // are sorted so we can bail early. 9512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (forward && length < highWaterLength) { 9522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 9532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 9552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We've checked all strings without a partial match. 9592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If we have full matches, return the longest one. 9602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (highWaterLength != 0) { 9612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller offset[0] += forward ? highWaterLength : -highWaterLength; 9622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return U_MATCH; 9632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return super.matches(text, offset, limit, incremental); 9662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 9702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns the longest match for s in text at the given position. 9712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If limit > start then match forward from start+1 to limit 9722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * matching all characters except s.charAt(0). If limit < start, 9732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * go backward starting from start-1 matching all characters 9742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * except s.charAt(s.length()-1). This method assumes that the 9752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * first character, text.charAt(start), matches s, so it does not 9762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * check it. 9772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param text the text to match 9782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param start the first character to match. In the forward 9792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * direction, text.charAt(start) is matched against s.charAt(0). 9802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * In the reverse direction, it is matched against 9812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * s.charAt(s.length()-1). 9822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param limit the limit offset for matching, either last+1 in 9832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the forward direction, or last-1 in the reverse direction, 9842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * where last is the index of the last character to match. 9852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return If part of s matches up to the limit, return |limit - 9862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * start|. If all of s matches before reaching the limit, return 9872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * s.length(). If there is a mismatch between s and text, return 9882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 0 9892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 9902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static int matchRest (Replaceable text, int start, int limit, String s) { 9912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int maxLen; 9922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int slen = s.length(); 9932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (start < limit) { 9942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller maxLen = limit - start; 9952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (maxLen > slen) maxLen = slen; 9962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i = 1; i < maxLen; ++i) { 9972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (text.charAt(start + i) != s.charAt(i)) return 0; 9982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 10002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller maxLen = start - limit; 10012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (maxLen > slen) maxLen = slen; 10022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller --slen; // <=> slen = s.length() - 1; 10032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i = 1; i < maxLen; ++i) { 10042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (text.charAt(start - i) != s.charAt(slen - i)) return 0; 10052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return maxLen; 10082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 1011f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert * Tests whether the text matches at the offset. If so, returns the end of the longest substring that it matches. If not, returns -1. 10122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @deprecated This API is ICU internal only. 101393cf604e9dd0525f15bc0a7450b2a35f3884c298Neil Fuller * @hide original deprecated declaration 1014836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide draft / provisional / internal are hidden on Android 10152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 10162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller @Deprecated 10172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int matchesAt(CharSequence text, int offset) { 10182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int lastLen = -1; 10192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller strings: 10202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (strings.size() != 0) { 10212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char firstChar = text.charAt(offset); 10222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String trial = null; 10232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // find the first string starting with firstChar 10242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Iterator<String> it = strings.iterator(); 10252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (it.hasNext()) { 10262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller trial = it.next(); 10272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char firstStringChar = trial.charAt(0); 10282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (firstStringChar < firstChar) continue; 10292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (firstStringChar > firstChar) break strings; 10302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // now keep checking string until we get the longest one 10332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (;;) { 10342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int tempLen = matchesAt(text, offset, trial); 10352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (lastLen > tempLen) break strings; 10362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastLen = tempLen; 10372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!it.hasNext()) break; 10382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller trial = it.next(); 10392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (lastLen < 2) { 10432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int cp = UTF16.charAt(text, offset); 10442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (contains(cp)) lastLen = UTF16.getCharCount(cp); 10452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return offset+lastLen; 10482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 10512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Does one string contain another, starting at a specific offset? 10522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param text text to match 10532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param offsetInText offset within that text 10542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param substring substring to match at offset in text 10552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return -1 if match fails, otherwise other.length() 10562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 10572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Note: This method was moved from CollectionUtilities 10582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static int matchesAt(CharSequence text, int offsetInText, CharSequence substring) { 10592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int len = substring.length(); 10602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int textLength = text.length(); 10612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (textLength + offsetInText > len) { 10622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 10632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i = 0; 10652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int j = offsetInText; i < len; ++i, ++j) { 10662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char pc = substring.charAt(i); 10672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char tc = text.charAt(j); 10682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (pc != tc) return -1; 10692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return i; 10712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 10742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Implementation of UnicodeMatcher API. Union the set of all 10752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * characters that may be matched by this object into the given 10762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * set. 10772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param toUnionTo the set into which to union the source characters 10782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 1079f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 10802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void addMatchSetTo(UnicodeSet toUnionTo) { 10812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller toUnionTo.addAll(this); 10822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 10852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns the index of the given character within this set, where 10862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the set is ordered by ascending code point. If the character 10872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * is not in this set, return -1. The inverse of this method is 10882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <code>charAt()</code>. 10892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return an index from 0..size()-1, or -1 10902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 10912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int indexOf(int c) { 10922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c < MIN_VALUE || c > MAX_VALUE) { 10932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(c, 6)); 10942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i = 0; 10962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int n = 0; 10972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (;;) { 10982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int start = list[i++]; 10992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c < start) { 11002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 11012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int limit = list[i++]; 11032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c < limit) { 11042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return n + c - start; 11052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n += limit - start; 11072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 11102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 11112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns the character at the given index within this set, where 11122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the set is ordered by ascending code point. If the index is 11132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * out of range, return -1. The inverse of this method is 11142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <code>indexOf()</code>. 11152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param index an index from 0..size()-1 11162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return the character at the given index, or -1. 11172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 11182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int charAt(int index) { 11192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (index >= 0) { 11202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // len2 is the largest even integer <= len, that is, it is len 11212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // for even values and len-1 for odd values. With odd values 11222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the last entry is UNICODESET_HIGH. 11232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int len2 = len & ~1; 11242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i=0; i < len2;) { 11252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int start = list[i++]; 11262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int count = list[i++] - start; 11272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (index < count) { 11282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return start + index; 11292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index -= count; 11312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 11342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 11362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 11372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Adds the specified range to this set if it is not already 11382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * present. If this set already contains the specified range, 1139bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * the call leaves this set unchanged. If <code>end > start</code> 11402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * then an empty range is added, leaving the set unchanged. 11412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 11422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param start first character, inclusive, of range to be added 11432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * to this set. 11442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param end last character, inclusive, of range to be added 11452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * to this set. 11462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 11472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet add(int start, int end) { 11482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller checkFrozen(); 11492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return add_unchecked(start, end); 11502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 11522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 11532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Adds all characters in range (uses preferred naming convention). 11542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param start The index of where to start on adding all characters. 11552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param end The index of where to end on adding all characters. 11562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return a reference to this object 11572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 11582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet addAll(int start, int end) { 11592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller checkFrozen(); 11602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return add_unchecked(start, end); 11612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 11632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // for internal use, after checkFrozen has been called 11642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private UnicodeSet add_unchecked(int start, int end) { 11652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (start < MIN_VALUE || start > MAX_VALUE) { 11662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6)); 11672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (end < MIN_VALUE || end > MAX_VALUE) { 11692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(end, 6)); 11702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (start < end) { 11722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller add(range(start, end), 2, 0); 11732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (start == end) { 11742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller add(start); 11752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 11772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 11792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // /** 11802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // * Format out the inversion list as a string, for debugging. Uncomment when 11812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // * needed. 11822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // */ 11832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // public final String dump() { 11842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // StringBuffer buf = new StringBuffer("["); 11852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // for (int i=0; i<len; ++i) { 11862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if (i != 0) buf.append(", "); 11872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // int c = list[i]; 11882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // //if (c <= 0x7F && c != '\n' && c != '\r' && c != '\t' && c != ' ') { 11892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // // buf.append((char) c); 11902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // //} else { 11912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // buf.append("U+").append(Utility.hex(c, (c<0x10000)?4:6)); 11922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // //} 11932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // } 11942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // buf.append("]"); 11952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // return buf.toString(); 11962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // } 11972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 11982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 11992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Adds the specified character to this set if it is not already 12002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * present. If this set already contains the specified character, 12012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the call leaves this set unchanged. 12022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 12032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final UnicodeSet add(int c) { 12042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller checkFrozen(); 12052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return add_unchecked(c); 12062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // for internal use only, after checkFrozen has been called 12092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final UnicodeSet add_unchecked(int c) { 12102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c < MIN_VALUE || c > MAX_VALUE) { 12112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(c, 6)); 12122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // find smallest i such that c < list[i] 12152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if odd, then it is IN the set 12162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if even, then it is OUT of the set 12172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i = findCodePoint(c); 12182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // already in set? 12202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if ((i & 1) != 0) return this; 12212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // HIGH is 0x110000 12232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // assert(list[len-1] == HIGH); 12242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // empty = [HIGH] 12262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // [start_0, limit_0, start_1, limit_1, HIGH] 12272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH] 12292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // ^ 12302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // list[i] 12312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // i == 0 means c is before the first range 12332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // TODO: Is the "list[i]-1" a typo? Even if you pass MAX_VALUE into 12342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // add_unchecked, the maximum value that "c" will be compared to 12352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // is "MAX_VALUE-1" meaning that "if (c == MAX_VALUE)" will 12362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // never be reached according to this logic. 12372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c == list[i]-1) { 12382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // c is before start of next range 12392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list[i] = c; 12402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if we touched the HIGH mark, then add a new one 1241f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert if (c == MAX_VALUE) { 12422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ensureCapacity(len+1); 12432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list[len++] = HIGH; 12442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (i > 0 && c == list[i-1]) { 12462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // collapse adjacent ranges 12472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // [..., start_k-1, c, c, limit_k, ..., HIGH] 12492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // ^ 12502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // list[i] 12512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.arraycopy(list, i+1, list, i-1, len-i-1); 12522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller len -= 2; 12532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else if (i > 0 && c == list[i-1]) { 12572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // c is after end of prior range 12582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list[i-1]++; 12592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // no need to chcek for collapse here 12602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 12632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // At this point we know the new char is not adjacent to 12642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // any existing ranges, and it is not 10FFFF. 12652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH] 12682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // ^ 12692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // list[i] 12702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // [..., start_k-1, limit_k-1, c, c+1, start_k, limit_k, ..., HIGH] 12722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // ^ 12732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // list[i] 12742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Don't use ensureCapacity() to save on copying. 12762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // NOTE: This has no measurable impact on performance, 12772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // but it might help in some usage patterns. 12782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (len+2 > list.length) { 12792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int[] temp = new int[len + 2 + GROW_EXTRA]; 12802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (i != 0) System.arraycopy(list, 0, temp, 0, i); 12812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.arraycopy(list, i, temp, i+2, len-i); 12822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list = temp; 12832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 12842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.arraycopy(list, i, list, i+2, len-i); 12852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list[i] = c; 12882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list[i+1] = c+1; 12892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller len += 2; 12902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pat = null; 12932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 12942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 12972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Adds the specified multicharacter to this set if it is not already 12982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * present. If this set already contains the multicharacter, 12992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the call leaves this set unchanged. 1300bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * Thus "ch" => {"ch"} 13012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b> 13022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param s the source string 13032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return this object, for chaining 13042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 13052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final UnicodeSet add(CharSequence s) { 13062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller checkFrozen(); 13072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int cp = getSingleCP(s); 13082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (cp < 0) { 13092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller strings.add(s.toString()); 13102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pat = null; 13112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 13122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller add_unchecked(cp, cp); 13132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 13152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 13182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Utility for getting code point from single code point CharSequence. 13192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * See the public UTF16.getSingleCodePoint() 13202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return a code point IF the string consists of a single one. 13212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * otherwise returns -1. 13222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param s to test 13232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 13242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static int getSingleCP(CharSequence s) { 13252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (s.length() < 1) { 13262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("Can't use zero-length strings in UnicodeSet"); 13272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (s.length() > 2) return -1; 13292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (s.length() == 1) return s.charAt(0); 13302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // at this point, len = 2 1332f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert int cp = UTF16.charAt(s, 0); 13332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (cp > 0xFFFF) { // is surrogate pair 13342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return cp; 13352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 13372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 1340bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"} 13412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If this set already any particular character, it has no effect on that character. 13422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param s the source string 13432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return this object, for chaining 13442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 13452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final UnicodeSet addAll(CharSequence s) { 13462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller checkFrozen(); 13472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int cp; 13482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { 13492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cp = UTF16.charAt(s, i); 13502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller add_unchecked(cp, cp); 13512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 13532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 13562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"} 13572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If this set already any particular character, it has no effect on that character. 13582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param s the source string 13592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return this object, for chaining 13602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 13612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final UnicodeSet retainAll(CharSequence s) { 13622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return retainAll(fromAll(s)); 13632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 13662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Complement EACH of the characters in this string. Note: "ch" == {"c", "h"} 13672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If this set already any particular character, it has no effect on that character. 13682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param s the source string 13692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return this object, for chaining 13702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 13712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final UnicodeSet complementAll(CharSequence s) { 13722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return complementAll(fromAll(s)); 13732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 13762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Remove EACH of the characters in this string. Note: "ch" == {"c", "h"} 13772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If this set already any particular character, it has no effect on that character. 13782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param s the source string 13792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return this object, for chaining 13802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 13812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final UnicodeSet removeAll(CharSequence s) { 13822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return removeAll(fromAll(s)); 13832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 13862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Remove all strings from this UnicodeSet 13872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return this object, for chaining 13882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 13892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final UnicodeSet removeAllStrings() { 13902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller checkFrozen(); 13912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (strings.size() != 0) { 13922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller strings.clear(); 13932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pat = null; 13942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 13962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 1399bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * Makes a set from a multicharacter string. Thus "ch" => {"ch"} 14002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b> 14012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param s the source string 14022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return a newly created set containing the given string 14032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 14042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static UnicodeSet from(CharSequence s) { 14052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return new UnicodeSet().add(s); 14062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 1410bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * Makes a set from each of the characters in the string. Thus "ch" => {"c", "h"} 14112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param s the source string 14122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return a newly created set containing the given characters 14132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 14142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static UnicodeSet fromAll(CharSequence s) { 14152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return new UnicodeSet().addAll(s); 14162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 14202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Retain only the elements in this set that are contained in the 1421bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * specified range. If <code>end > start</code> then an empty range is 14222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * retained, leaving the set empty. 14232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 14242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param start first character, inclusive, of range to be retained 14252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * to this set. 14262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param end last character, inclusive, of range to be retained 14272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * to this set. 14282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 14292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet retain(int start, int end) { 14302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller checkFrozen(); 14312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (start < MIN_VALUE || start > MAX_VALUE) { 14322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6)); 14332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (end < MIN_VALUE || end > MAX_VALUE) { 14352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(end, 6)); 14362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (start <= end) { 14382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller retain(range(start, end), 2, 0); 14392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 14402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller clear(); 14412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 14432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 14462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Retain the specified character from this set if it is present. 14472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Upon return this set will be empty if it did not contain c, or 14482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * will only contain c if it did contain c. 14492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param c the character to be retained 14502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return this object, for chaining 14512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 14522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final UnicodeSet retain(int c) { 14532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return retain(c, c); 14542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 14572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Retain the specified string in this set if it is present. 14582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Upon return this set will be empty if it did not contain s, or 14592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * will only contain s if it did contain s. 14602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param cs the string to be retained 14612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return this object, for chaining 14622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 14632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final UnicodeSet retain(CharSequence cs) { 14642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1465f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert int cp = getSingleCP(cs); 14662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (cp < 0) { 14672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String s = cs.toString(); 14682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean isIn = strings.contains(s); 14692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (isIn && size() == 1) { 14702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 14712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller clear(); 14732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller strings.add(s); 14742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pat = null; 14752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 14762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller retain(cp, cp); 14772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 14792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 14822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Removes the specified range from this set if it is present. 14832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The set will not contain the specified range once the call 1484bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * returns. If <code>end > start</code> then an empty range is 14852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * removed, leaving the set unchanged. 14862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 14872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param start first character, inclusive, of range to be removed 14882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * from this set. 14892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param end last character, inclusive, of range to be removed 14902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * from this set. 14912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 14922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet remove(int start, int end) { 14932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller checkFrozen(); 14942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (start < MIN_VALUE || start > MAX_VALUE) { 14952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6)); 14962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (end < MIN_VALUE || end > MAX_VALUE) { 14982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(end, 6)); 14992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (start <= end) { 15012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller retain(range(start, end), 2, 2); 15022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 15042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 15062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 15072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Removes the specified character from this set if it is present. 15082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The set will not contain the specified character once the call 15092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * returns. 15102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param c the character to be removed 15112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return this object, for chaining 15122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 15132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final UnicodeSet remove(int c) { 15142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return remove(c, c); 15152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 15172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 15182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Removes the specified string from this set if it is present. 15192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The set will not contain the specified string once the call 15202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * returns. 15212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param s the string to be removed 15222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return this object, for chaining 15232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 15242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final UnicodeSet remove(CharSequence s) { 15252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int cp = getSingleCP(s); 15262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (cp < 0) { 15272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller strings.remove(s.toString()); 15282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pat = null; 15292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 15302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller remove(cp, cp); 15312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 15332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 15352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 15362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Complements the specified range in this set. Any character in 15372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the range will be removed if it is in this set, or will be 1538bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * added if it is not in this set. If <code>end > start</code> 15392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * then an empty range is complemented, leaving the set unchanged. 15402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 15412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param start first character, inclusive, of range to be removed 15422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * from this set. 15432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param end last character, inclusive, of range to be removed 15442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * from this set. 15452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 15462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet complement(int start, int end) { 15472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller checkFrozen(); 15482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (start < MIN_VALUE || start > MAX_VALUE) { 15492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6)); 15502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (end < MIN_VALUE || end > MAX_VALUE) { 15522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(end, 6)); 15532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (start <= end) { 15552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller xor(range(start, end), 2, 0); 15562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pat = null; 15582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 15592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 15612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 15622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Complements the specified character in this set. The character 15632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * will be removed if it is in this set, or will be added if it is 15642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * not in this set. 15652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 15662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final UnicodeSet complement(int c) { 15672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return complement(c, c); 15682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 15702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 15712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * This is equivalent to 15722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <code>complement(MIN_VALUE, MAX_VALUE)</code>. 15732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 15742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet complement() { 15752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller checkFrozen(); 15762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (list[0] == LOW) { 15772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.arraycopy(list, 1, list, 0, len-1); 15782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller --len; 15792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 15802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ensureCapacity(len+1); 15812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.arraycopy(list, 0, list, 1, len); 15822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list[0] = LOW; 15832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++len; 15842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pat = null; 15862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 15872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 15892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 15902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Complement the specified string in this set. 15912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The set will not contain the specified string once the call 15922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * returns. 15932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b> 15942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param s the string to complement 15952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return this object, for chaining 15962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 15972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final UnicodeSet complement(CharSequence s) { 15982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller checkFrozen(); 15992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int cp = getSingleCP(s); 16002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (cp < 0) { 16012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String s2 = s.toString(); 16022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (strings.contains(s2)) { 16032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller strings.remove(s2); 16042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 16052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller strings.add(s2); 16062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pat = null; 16082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 16092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller complement(cp, cp); 16102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 16122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 16142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 16152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns true if this set contains the given character. 16162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param c character to be checked for containment 16172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return true if the test condition is met 16182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 1619f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 16202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean contains(int c) { 16212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c < MIN_VALUE || c > MAX_VALUE) { 16222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(c, 6)); 16232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (bmpSet != null) { 16252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return bmpSet.contains(c); 16262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (stringSpan != null) { 16282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return stringSpan.contains(c); 16292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 16312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 16322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Set i to the index of the start item greater than ch 16332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We know we will terminate without length test! 16342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i = -1; 16352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (true) { 16362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c < list[++i]) break; 16372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 16392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 16402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i = findCodePoint(c); 16412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 16422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ((i & 1) != 0); // return true if odd 16432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 16452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 16462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns the smallest value i such that c < list[i]. Caller 16472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * must ensure that c is a legal value or this method will enter 16482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * an infinite loop. This method performs a binary search. 16492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param c a character in the range MIN_VALUE..MAX_VALUE 16502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * inclusive 16512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return the smallest integer i in the range 0..len-1, 16522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * inclusive, such that c < list[i] 16532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 16542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final int findCodePoint(int c) { 16552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* Examples: 16562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller findCodePoint(c) 16572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set list[] c=0 1 3 4 7 8 16582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller === ============== =========== 16592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller [] [110000] 0 0 0 0 0 0 16602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller [\u0000-\u0003] [0, 4, 110000] 1 1 1 2 2 2 16612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller [\u0004-\u0007] [4, 8, 110000] 0 0 0 1 1 2 16622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller [:all:] [0, 110000] 1 1 1 1 1 1 16632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 16642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 16652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Return the smallest i such that c < list[i]. Assume 16662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // list[len - 1] == HIGH and that c is legal (0..HIGH-1). 16672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c < list[0]) return 0; 16682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // High runner test. c is often after the last range, so an 16692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // initial check for this condition pays off. 16702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (len >= 2 && c >= list[len-2]) return len-1; 16712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int lo = 0; 16722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int hi = len - 1; 16732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // invariant: c >= list[lo] 16742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // invariant: c < list[hi] 16752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (;;) { 16762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i = (lo + hi) >>> 1; 16772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (i == lo) return hi; 16782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c < list[i]) { 16792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller hi = i; 16802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 16812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lo = i; 16822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 16862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // //---------------------------------------------------------------- 16872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // // Unrolled binary search 16882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // //---------------------------------------------------------------- 16892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 16902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // private int validLen = -1; // validated value of len 16912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // private int topOfLow; 16922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // private int topOfHigh; 16932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // private int power; 16942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // private int deltaStart; 16952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 16962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // private void validate() { 16972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if (len <= 1) { 16982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // throw new IllegalArgumentException("list.len==" + len + "; must be >1"); 16992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // } 17002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 17012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // // find greatest power of 2 less than or equal to len 17022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // for (power = exp2.length-1; power > 0 && exp2[power] > len; power--) {} 17032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 17042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // // assert(exp2[power] <= len); 17052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 17062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // // determine the starting points 17072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // topOfLow = exp2[power] - 1; 17082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // topOfHigh = len - 1; 17092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // deltaStart = exp2[power-1]; 17102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // validLen = len; 17112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // } 17122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 17132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // private static final int exp2[] = { 17142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 0x1, 0x2, 0x4, 0x8, 17152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 0x10, 0x20, 0x40, 0x80, 17162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 0x100, 0x200, 0x400, 0x800, 17172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 0x1000, 0x2000, 0x4000, 0x8000, 17182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 0x10000, 0x20000, 0x40000, 0x80000, 17192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 0x100000, 0x200000, 0x400000, 0x800000, 17202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 0x1000000, 0x2000000, 0x4000000, 0x8000000, 17212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 0x10000000, 0x20000000 // , 0x40000000 // no unsigned int in Java 17222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // }; 17232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 17242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // /** 17252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // * Unrolled lowest index GT. 17262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // */ 17272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // private final int leastIndexGT(int searchValue) { 17282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 17292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if (len != validLen) { 17302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if (len == 1) return 0; 17312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // validate(); 17322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // } 17332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // int temp; 17342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 17352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // // set up initial range to search. Each subrange is a power of two in length 17362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // int high = searchValue < list[topOfLow] ? topOfLow : topOfHigh; 17372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 17382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // // Completely unrolled binary search, folhighing "Programming Pearls" 17392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // // Each case deliberately falls through to the next 17402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // // Logically, list[-1] < all_search_values && list[count] > all_search_values 17412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // // although the values -1 and count are never actually touched. 17422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 17432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // // The bounds at each point are low & high, 17442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // // where low == high - delta*2 17452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // // so high - delta is the midpoint 17462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 17472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // // The invariant AFTER each line is that list[low] < searchValue <= list[high] 17482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 17492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // switch (power) { 17502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // //case 31: if (searchValue < list[temp = high-0x40000000]) high = temp; // no unsigned int in Java 17512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // case 30: if (searchValue < list[temp = high-0x20000000]) high = temp; 17522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // case 29: if (searchValue < list[temp = high-0x10000000]) high = temp; 17532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 17542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // case 28: if (searchValue < list[temp = high- 0x8000000]) high = temp; 17552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // case 27: if (searchValue < list[temp = high- 0x4000000]) high = temp; 17562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // case 26: if (searchValue < list[temp = high- 0x2000000]) high = temp; 17572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // case 25: if (searchValue < list[temp = high- 0x1000000]) high = temp; 17582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 17592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // case 24: if (searchValue < list[temp = high- 0x800000]) high = temp; 17602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // case 23: if (searchValue < list[temp = high- 0x400000]) high = temp; 17612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // case 22: if (searchValue < list[temp = high- 0x200000]) high = temp; 17622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // case 21: if (searchValue < list[temp = high- 0x100000]) high = temp; 17632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 17642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // case 20: if (searchValue < list[temp = high- 0x80000]) high = temp; 17652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // case 19: if (searchValue < list[temp = high- 0x40000]) high = temp; 17662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // case 18: if (searchValue < list[temp = high- 0x20000]) high = temp; 17672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // case 17: if (searchValue < list[temp = high- 0x10000]) high = temp; 17682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 17692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // case 16: if (searchValue < list[temp = high- 0x8000]) high = temp; 17702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // case 15: if (searchValue < list[temp = high- 0x4000]) high = temp; 17712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // case 14: if (searchValue < list[temp = high- 0x2000]) high = temp; 17722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // case 13: if (searchValue < list[temp = high- 0x1000]) high = temp; 17732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 17742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // case 12: if (searchValue < list[temp = high- 0x800]) high = temp; 17752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // case 11: if (searchValue < list[temp = high- 0x400]) high = temp; 17762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // case 10: if (searchValue < list[temp = high- 0x200]) high = temp; 17772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // case 9: if (searchValue < list[temp = high- 0x100]) high = temp; 17782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 17792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // case 8: if (searchValue < list[temp = high- 0x80]) high = temp; 17802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // case 7: if (searchValue < list[temp = high- 0x40]) high = temp; 17812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // case 6: if (searchValue < list[temp = high- 0x20]) high = temp; 17822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // case 5: if (searchValue < list[temp = high- 0x10]) high = temp; 17832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 17842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // case 4: if (searchValue < list[temp = high- 0x8]) high = temp; 17852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // case 3: if (searchValue < list[temp = high- 0x4]) high = temp; 17862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // case 2: if (searchValue < list[temp = high- 0x2]) high = temp; 17872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // case 1: if (searchValue < list[temp = high- 0x1]) high = temp; 17882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // } 17892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 17902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // return high; 17912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // } 17922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 17932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // // For debugging only 17942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // public int len() { 17952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // return len; 17962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // } 17972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 17982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // //---------------------------------------------------------------- 17992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // //---------------------------------------------------------------- 18002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 18012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 18022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns true if this set contains every character 18032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * of the given range. 18042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param start first character, inclusive, of the range 18052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param end last character, inclusive, of the range 18062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return true if the test condition is met 18072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 18082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean contains(int start, int end) { 18092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (start < MIN_VALUE || start > MAX_VALUE) { 18102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6)); 18112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (end < MIN_VALUE || end > MAX_VALUE) { 18132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(end, 6)); 18142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //int i = -1; 18162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //while (true) { 18172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if (start < list[++i]) break; 18182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //} 18192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i = findCodePoint(start); 18202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ((i & 1) != 0 && end < list[i]); 18212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 18232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 18242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns <tt>true</tt> if this set contains the given 18252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * multicharacter string. 18262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param s string to be checked for containment 18272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return <tt>true</tt> if this set contains the specified string 18282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 18292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final boolean contains(CharSequence s) { 18302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 18312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int cp = getSingleCP(s); 18322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (cp < 0) { 18332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return strings.contains(s.toString()); 18342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 18352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return contains(cp); 18362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 18392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 18402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns true if this set contains all the characters and strings 18412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * of the given set. 18422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param b set to be checked for containment 18432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return true if the test condition is met 18442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 18452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean containsAll(UnicodeSet b) { 18462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The specified set is a subset if all of its pairs are contained in 18472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // this set. This implementation accesses the lists directly for speed. 18482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // TODO: this could be faster if size() were cached. But that would affect building speed 18492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // so it needs investigation. 18502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int[] listB = b.list; 18512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean needA = true; 18522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean needB = true; 18532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int aPtr = 0; 18542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int bPtr = 0; 18552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int aLen = len - 1; 18562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int bLen = b.len - 1; 18572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int startA = 0, startB = 0, limitA = 0, limitB = 0; 18582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (true) { 18592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // double iterations are such a pain... 18602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (needA) { 18612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (aPtr >= aLen) { 18622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // ran out of A. If B is also exhausted, then break; 18632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (needB && bPtr >= bLen) { 18642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 18652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 18672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller startA = list[aPtr++]; 18692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller limitA = list[aPtr++]; 18702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (needB) { 18722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (bPtr >= bLen) { 18732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // ran out of B. Since we got this far, we have an A and we are ok so far 18742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 18752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller startB = listB[bPtr++]; 18772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller limitB = listB[bPtr++]; 18782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if B doesn't overlap and is greater than A, get new A 18802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (startB >= limitA) { 18812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller needA = true; 18822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller needB = false; 18832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 18842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if B is wholy contained in A, then get a new B 18862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (startB >= startA && limitB <= limitA) { 18872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller needA = false; 18882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller needB = true; 18892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 18902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // all other combinations mean we fail 18922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 18932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 18952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!strings.containsAll(b.strings)) return false; 18962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 18972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 18992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // /** 19002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // * Returns true if this set contains all the characters and strings 19012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // * of the given set. 19022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // * @param c set to be checked for containment 19032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // * @return true if the test condition is met 19042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // * @stable ICU 2.0 19052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // */ 19062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // public boolean containsAllOld(UnicodeSet c) { 19072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // // The specified set is a subset if all of its pairs are contained in 19082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // // this set. It's possible to code this more efficiently in terms of 19092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // // direct manipulation of the inversion lists if the need arises. 19102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // int n = c.getRangeCount(); 19112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // for (int i=0; i<n; ++i) { 19122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if (!contains(c.getRangeStart(i), c.getRangeEnd(i))) { 19132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // return false; 19142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // } 19152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // } 19162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if (!strings.containsAll(c.strings)) return false; 19172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // return true; 19182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // } 19192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 19202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 19212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns true if there is a partition of the string such that this set contains each of the partitioned strings. 19222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * For example, for the Unicode set [a{bc}{cd}]<br> 19232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * containsAll is true for each of: "a", "bc", ""cdbca"<br> 19242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * containsAll is false for each of: "acb", "bcda", "bcx"<br> 19252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param s string containing characters to be checked for containment 19262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return true if the test condition is met 19272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 19282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean containsAll(String s) { 19292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int cp; 19302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { 19312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cp = UTF16.charAt(s, i); 19322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!contains(cp)) { 19332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (strings.size() == 0) { 19342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 19352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 19362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return containsAll(s, 0); 19372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 19382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 19392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 19402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 19412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 19422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 19432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Recursive routine called if we fail to find a match in containsAll, and there are strings 19442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param s source string 19452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param i point to match to the end on 19462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return true if ok 19472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 19482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private boolean containsAll(String s, int i) { 19492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (i >= s.length()) { 19502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 19512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 19522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int cp= UTF16.charAt(s, i); 19532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (contains(cp) && containsAll(s, i+UTF16.getCharCount(cp))) { 19542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 19552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 19562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (String setStr : strings) { 19572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (s.startsWith(setStr, i) && containsAll(s, i+setStr.length())) { 19582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 19592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 19602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 19612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 19622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 19632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 19642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 19652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 19662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Get the Regex equivalent for this UnicodeSet 19672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return regex pattern equivalent to this UnicodeSet 19682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @deprecated This API is ICU internal only. 196993cf604e9dd0525f15bc0a7450b2a35f3884c298Neil Fuller * @hide original deprecated declaration 1970836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide draft / provisional / internal are hidden on Android 19712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 19722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller @Deprecated 19732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public String getRegexEquivalent() { 19742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (strings.size() == 0) { 19752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return toString(); 19762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 19772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder result = new StringBuilder("(?:"); 19782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller appendNewPattern(result, true, false); 19792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (String s : strings) { 19802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append('|'); 19812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller _appendToPat(result, s, true); 19822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 19832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return result.append(")").toString(); 19842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 19852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 19862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 19872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns true if this set contains none of the characters 19882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * of the given range. 19892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param start first character, inclusive, of the range 19902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param end last character, inclusive, of the range 19912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return true if the test condition is met 19922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 19932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean containsNone(int start, int end) { 19942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (start < MIN_VALUE || start > MAX_VALUE) { 19952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6)); 19962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 19972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (end < MIN_VALUE || end > MAX_VALUE) { 19982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(end, 6)); 19992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i = -1; 20012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (true) { 20022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (start < list[++i]) break; 20032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ((i & 1) == 0 && end < list[i]); 20052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 20072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 20082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns true if none of the characters or strings in this UnicodeSet appears in the string. 20092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * For example, for the Unicode set [a{bc}{cd}]<br> 20102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * containsNone is true for: "xy", "cb"<br> 20112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * containsNone is false for: "a", "bc", "bcd"<br> 20122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param b set to be checked for containment 20132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return true if the test condition is met 20142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 20152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean containsNone(UnicodeSet b) { 20162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The specified set is a subset if some of its pairs overlap with some of this set's pairs. 20172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // This implementation accesses the lists directly for speed. 20182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int[] listB = b.list; 20192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean needA = true; 20202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean needB = true; 20212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int aPtr = 0; 20222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int bPtr = 0; 20232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int aLen = len - 1; 20242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int bLen = b.len - 1; 20252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int startA = 0, startB = 0, limitA = 0, limitB = 0; 20262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (true) { 20272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // double iterations are such a pain... 20282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (needA) { 20292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (aPtr >= aLen) { 20302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // ran out of A: break so we test strings 20312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 20322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller startA = list[aPtr++]; 20342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller limitA = list[aPtr++]; 20352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (needB) { 20372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (bPtr >= bLen) { 20382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // ran out of B: break so we test strings 20392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 20402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller startB = listB[bPtr++]; 20422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller limitB = listB[bPtr++]; 20432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if B is higher than any part of A, get new A 20452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (startB >= limitA) { 20462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller needA = true; 20472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller needB = false; 20482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 20492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if A is higher than any part of B, get new B 20512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (startA >= limitB) { 20522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller needA = false; 20532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller needB = true; 20542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 20552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // all other combinations mean we fail 20572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 20582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 20602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!SortedSetRelation.hasRelation(strings, SortedSetRelation.DISJOINT, b.strings)) return false; 20612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 20622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 20642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // /** 20652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // * Returns true if none of the characters or strings in this UnicodeSet appears in the string. 20662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // * For example, for the Unicode set [a{bc}{cd}]<br> 20672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // * containsNone is true for: "xy", "cb"<br> 20682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // * containsNone is false for: "a", "bc", "bcd"<br> 20692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // * @param c set to be checked for containment 20702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // * @return true if the test condition is met 20712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // * @stable ICU 2.0 20722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // */ 20732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // public boolean containsNoneOld(UnicodeSet c) { 20742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // // The specified set is a subset if all of its pairs are contained in 20752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // // this set. It's possible to code this more efficiently in terms of 20762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // // direct manipulation of the inversion lists if the need arises. 20772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // int n = c.getRangeCount(); 20782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // for (int i=0; i<n; ++i) { 20792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if (!containsNone(c.getRangeStart(i), c.getRangeEnd(i))) { 20802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // return false; 20812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // } 20822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // } 20832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if (!SortedSetRelation.hasRelation(strings, SortedSetRelation.DISJOINT, c.strings)) return false; 20842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // return true; 20852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // } 20862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 20872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 20882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns true if this set contains none of the characters 20892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * of the given string. 20902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param s string containing characters to be checked for containment 20912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return true if the test condition is met 20922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 20932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean containsNone(CharSequence s) { 20942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return span(s, SpanCondition.NOT_CONTAINED) == s.length(); 20952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 20972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 20982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns true if this set contains one or more of the characters 20992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * in the given range. 21002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param start first character, inclusive, of the range 21012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param end last character, inclusive, of the range 21022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return true if the condition is met 21032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 21042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final boolean containsSome(int start, int end) { 21052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return !containsNone(start, end); 21062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 21082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 21092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns true if this set contains one or more of the characters 21102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * and strings of the given set. 21112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param s set to be checked for containment 21122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return true if the condition is met 21132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 21142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final boolean containsSome(UnicodeSet s) { 21152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return !containsNone(s); 21162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 21182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 21192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns true if this set contains one or more of the characters 21202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * of the given string. 21212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param s string containing characters to be checked for containment 21222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return true if the condition is met 21232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 21242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final boolean containsSome(CharSequence s) { 21252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return !containsNone(s); 21262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 21282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 21292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 21302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Adds all of the elements in the specified set to this set if 21312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * they're not already present. This operation effectively 21322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * modifies this set so that its value is the <i>union</i> of the two 21332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * sets. The behavior of this operation is unspecified if the specified 21342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * collection is modified while the operation is in progress. 21352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 21362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param c set whose elements are to be added to this set. 21372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 21382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet addAll(UnicodeSet c) { 21392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller checkFrozen(); 21402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller add(c.list, c.len, 0); 21412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller strings.addAll(c.strings); 21422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 21432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 21452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 21462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Retains only the elements in this set that are contained in the 21472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * specified set. In other words, removes from this set all of 21482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * its elements that are not contained in the specified set. This 21492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * operation effectively modifies this set so that its value is 21502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the <i>intersection</i> of the two sets. 21512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 21522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param c set that defines which elements this set will retain. 21532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 21542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet retainAll(UnicodeSet c) { 21552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller checkFrozen(); 21562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller retain(c.list, c.len, 0); 21572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller strings.retainAll(c.strings); 21582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 21592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 21612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 21622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Removes from this set all of its elements that are contained in the 21632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * specified set. This operation effectively modifies this 21642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * set so that its value is the <i>asymmetric set difference</i> of 21652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the two sets. 21662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 21672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param c set that defines which elements will be removed from 21682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * this set. 21692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 21702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet removeAll(UnicodeSet c) { 21712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller checkFrozen(); 21722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller retain(c.list, c.len, 2); 21732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller strings.removeAll(c.strings); 21742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 21752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 21772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 21782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Complements in this set all elements contained in the specified 21792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * set. Any character in the other set will be removed if it is 21802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * in this set, or will be added if it is not in this set. 21812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 21822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param c set that defines which elements will be complemented from 21832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * this set. 21842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 21852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet complementAll(UnicodeSet c) { 21862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller checkFrozen(); 21872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller xor(c.list, c.len, 0); 21882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller SortedSetRelation.doOperation(strings, SortedSetRelation.COMPLEMENTALL, c.strings); 21892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 21902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 21922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 21932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Removes all of the elements from this set. This set will be 21942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * empty after this call returns. 21952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 21962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet clear() { 21972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller checkFrozen(); 21982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list[0] = HIGH; 21992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller len = 1; 22002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pat = null; 22012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller strings.clear(); 22022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 22032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 22042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 22052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 22062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Iteration method that returns the number of ranges contained in 22072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * this set. 22082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see #getRangeStart 22092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see #getRangeEnd 22102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 22112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int getRangeCount() { 22122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return len/2; 22132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 22142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 22152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 22162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Iteration method that returns the first character in the 22172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * specified range of this set. 22182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @exception ArrayIndexOutOfBoundsException if index is outside 22192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the range <code>0..getRangeCount()-1</code> 22202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see #getRangeCount 22212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see #getRangeEnd 22222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 22232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int getRangeStart(int index) { 22242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return list[index*2]; 22252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 22262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 22272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 22282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Iteration method that returns the last character in the 22292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * specified range of this set. 22302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @exception ArrayIndexOutOfBoundsException if index is outside 22312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the range <code>0..getRangeCount()-1</code> 22322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see #getRangeStart 22332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see #getRangeEnd 22342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 22352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int getRangeEnd(int index) { 22362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (list[index*2 + 1] - 1); 22372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 22382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 22392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 22402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Reallocate this objects internal structures to take up the least 22412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * possible space, without changing this object's value. 22422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 22432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet compact() { 22442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller checkFrozen(); 22452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (len != list.length) { 22462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int[] temp = new int[len]; 22472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.arraycopy(list, 0, temp, 0, len); 22482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list = temp; 22492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 22502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller rangeList = null; 22512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer = null; 22522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 22532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 22542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 22552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 22562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Compares the specified object with this set for equality. Returns 22572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <tt>true</tt> if the specified object is also a set, the two sets 22582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * have the same size, and every member of the specified set is 22592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * contained in this set (or equivalently, every member of this set is 22602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * contained in the specified set). 22612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 22622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param o Object to be compared for equality with this set. 22632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return <tt>true</tt> if the specified Object is equal to this set. 22642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2265f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 22662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean equals(Object o) { 22672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (o == null) { 22682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 22692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 22702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (this == o) { 22712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 22722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 22732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 22742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeSet that = (UnicodeSet) o; 22752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (len != that.len) return false; 22762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i = 0; i < len; ++i) { 22772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (list[i] != that.list[i]) return false; 22782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 22792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!strings.equals(that.strings)) return false; 22802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch (Exception e) { 22812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 22822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 22832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 22842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 22852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 22862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 22872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns the hash code value for this set. 22882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 22892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return the hash code value for this set. 22902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see java.lang.Object#hashCode() 22912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2292f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 22932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int hashCode() { 22942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int result = len; 22952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i = 0; i < len; ++i) { 22962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result *= 1000003; 22972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result += list[i]; 22982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 22992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return result; 23002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 23012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 23022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 23032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Return a programmer-readable string representation of this object. 23042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2305f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 23062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public String toString() { 23072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return toPattern(true); 23082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 23092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 23102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //---------------------------------------------------------------- 23112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Implementation: Pattern parsing 23122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //---------------------------------------------------------------- 23132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 23142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 23152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Parses the given pattern, starting at the given position. The character 23162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * at pattern.charAt(pos.getIndex()) must be '[', or the parse fails. 23172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Parsing continues until the corresponding closing ']'. If a syntax error 23182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * is encountered between the opening and closing brace, the parse fails. 23192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Upon return from a successful parse, the ParsePosition is updated to 23202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * point to the character following the closing ']', and an inversion 23212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * list for the parsed pattern is returned. This method 23222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * calls itself recursively to parse embedded subpatterns. 23232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 23242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param pattern the string containing the pattern to be parsed. The 23252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * portion of the string from pos.getIndex(), which must be a '[', to the 23262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * corresponding closing ']', is parsed. 23272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param pos upon entry, the position at which to being parsing. The 23282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * character at pattern.charAt(pos.getIndex()) must be a '['. Upon return 23292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * from a successful parse, pos.getIndex() is either the character after the 23302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * closing ']' of the parsed pattern, or pattern.length() if the closing ']' 23312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * is the last character of the pattern string. 23322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return an inversion list for the parsed substring 23332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * of <code>pattern</code> 23342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @exception java.lang.IllegalArgumentException if the parse fails. 23352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @deprecated This API is ICU internal only. 233693cf604e9dd0525f15bc0a7450b2a35f3884c298Neil Fuller * @hide original deprecated declaration 2337836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide draft / provisional / internal are hidden on Android 23382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 23392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller @Deprecated 23402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet applyPattern(String pattern, 23412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ParsePosition pos, 23422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller SymbolTable symbols, 23432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int options) { 23442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 23452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Need to build the pattern in a temporary string because 23462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // _applyPattern calls add() etc., which set pat to empty. 23472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean parsePositionWasNull = pos == null; 23482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (parsePositionWasNull) { 23492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pos = new ParsePosition(0); 23502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 23512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 23522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder rebuiltPat = new StringBuilder(); 23532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RuleCharacterIterator chars = 23542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller new RuleCharacterIterator(pattern, symbols, pos); 23552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller applyPattern(chars, symbols, rebuiltPat, options); 23562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (chars.inVariable()) { 23572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller syntaxError(chars, "Extra chars in variable value"); 23582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 23592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pat = rebuiltPat.toString(); 23602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (parsePositionWasNull) { 23612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i = pos.getIndex(); 23622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 23632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Skip over trailing whitespace 23642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if ((options & IGNORE_SPACE) != 0) { 23652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i = PatternProps.skipWhiteSpace(pattern, i); 23662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 23672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 23682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (i != pattern.length()) { 23692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("Parse of \"" + pattern + 23702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "\" failed at " + i); 23712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 23722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 23732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 23742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 23752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 23762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Add constants to make the applyPattern() code easier to follow. 23772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2378f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert private static final int LAST0_START = 0, 2379f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert LAST1_RANGE = 1, 23802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller LAST2_SET = 2; 23812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2382f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert private static final int MODE0_NONE = 0, 2383f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert MODE1_INBRACKET = 1, 23842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller MODE2_OUTBRACKET = 2; 23852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2386f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert private static final int SETMODE0_NONE = 0, 2387f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert SETMODE1_UNICODESET = 1, 2388f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert SETMODE2_PROPERTYPAT = 2, 23892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller SETMODE3_PREPARSED = 3; 23902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 23912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 23922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Parse the pattern from the given RuleCharacterIterator. The 23932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * iterator is advanced over the parsed pattern. 23942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param chars iterator over the pattern characters. Upon return 23952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * it will be advanced to the first character after the parsed 23962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * pattern, or the end of the iteration if all characters are 23972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * parsed. 23982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param symbols symbol table to use to parse and dereference 23992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * variables, or null if none. 24002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param rebuiltPat the pattern that was parsed, rebuilt or 24012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * copied from the input pattern, as appropriate. 24022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param options a bit mask of zero or more of the following: 24032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * IGNORE_SPACE, CASE. 24042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 24052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void applyPattern(RuleCharacterIterator chars, SymbolTable symbols, 24062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Appendable rebuiltPat, int options) { 24072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 24082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Syntax characters: [ ] ^ - & { } 24092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 24102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Recognized special forms for chars, sets: c-c s-s s&s 24112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 24122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int opts = RuleCharacterIterator.PARSE_VARIABLES | 24132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RuleCharacterIterator.PARSE_ESCAPES; 24142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if ((options & IGNORE_SPACE) != 0) { 24152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller opts |= RuleCharacterIterator.SKIP_WHITESPACE; 24162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 24172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 24182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder patBuf = new StringBuilder(), buf = null; 24192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean usePat = false; 24202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeSet scratch = null; 24212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Object backup = null; 24222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 24232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // mode: 0=before [, 1=between [...], 2=after ] 24242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // lastItem: 0=none, 1=char, 2=set 24252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int lastItem = LAST0_START, lastChar = 0, mode = MODE0_NONE; 24262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char op = 0; 24272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 24282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean invert = false; 24292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 24302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller clear(); 24312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String lastString = null; 24322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 24332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (mode != MODE2_OUTBRACKET && !chars.atEnd()) { 24342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //Eclipse stated the following is "dead code" 24352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 24362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (false) { 24372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Debugging assertion 24382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!((lastItem == 0 && op == 0) || 24392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (lastItem == 1 && (op == 0 || op == '-')) || 24402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (lastItem == 2 && (op == 0 || op == '-' || op == '&')))) { 24412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException(); 24422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 24432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller }*/ 24442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 24452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c = 0; 24462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean literal = false; 24472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeSet nested = null; 24482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 24492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // -------- Check for property pattern 24502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 24512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // setMode: 0=none, 1=unicodeset, 2=propertypat, 3=preparsed 24522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int setMode = SETMODE0_NONE; 24532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (resemblesPropertyPattern(chars, opts)) { 24542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setMode = SETMODE2_PROPERTYPAT; 24552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 24562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 24572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // -------- Parse '[' of opening delimiter OR nested set. 24582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If there is a nested set, use `setMode' to define how 24592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the set should be parsed. If the '[' is part of the 24602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // opening delimiter for this pattern, parse special 24612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // strings "[", "[^", "[-", and "[^-". Check for stand-in 24622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // characters representing a nested set in the symbol 24632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // table. 24642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 24652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 24662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Prepare to backup if necessary 24672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller backup = chars.getPos(backup); 24682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = chars.next(opts); 24692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller literal = chars.isEscaped(); 24702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 24712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c == '[' && !literal) { 24722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (mode == MODE1_INBRACKET) { 24732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller chars.setPos(backup); // backup 24742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setMode = SETMODE1_UNICODESET; 24752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 24762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Handle opening '[' delimiter 24772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller mode = MODE1_INBRACKET; 24782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller patBuf.append('['); 24792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller backup = chars.getPos(backup); // prepare to backup 24802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = chars.next(opts); 24812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller literal = chars.isEscaped(); 24822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c == '^' && !literal) { 24832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller invert = true; 24842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller patBuf.append('^'); 24852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller backup = chars.getPos(backup); // prepare to backup 24862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = chars.next(opts); 24872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller literal = chars.isEscaped(); 24882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 24892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Fall through to handle special leading '-'; 24902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // otherwise restart loop for nested [], \p{}, etc. 24912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c == '-') { 24922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller literal = true; 24932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Fall through to handle literal '-' below 24942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 24952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller chars.setPos(backup); // backup 24962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 24972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 24982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 24992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (symbols != null) { 25002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeMatcher m = symbols.lookupMatcher(c); // may be null 25012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (m != null) { 25022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 25032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nested = (UnicodeSet) m; 25042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setMode = SETMODE3_PREPARSED; 25052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch (ClassCastException e) { 25062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller syntaxError(chars, "Syntax error"); 25072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 25082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 25092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 25102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 25112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 25122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // -------- Handle a nested set. This either is inline in 25132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the pattern or represented by a stand-in that has 25142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // previously been parsed and was looked up in the symbol 25152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // table. 25162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 25172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (setMode != SETMODE0_NONE) { 25182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (lastItem == LAST1_RANGE) { 25192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (op != 0) { 25202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller syntaxError(chars, "Char expected after operator"); 25212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 25222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller add_unchecked(lastChar, lastChar); 25232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller _appendToPat(patBuf, lastChar, false); 25242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastItem = LAST0_START; 25252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller op = 0; 25262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 25272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 25282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (op == '-' || op == '&') { 25292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller patBuf.append(op); 25302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 25312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 25322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (nested == null) { 25332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (scratch == null) scratch = new UnicodeSet(); 25342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nested = scratch; 25352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 25362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller switch (setMode) { 25372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case SETMODE1_UNICODESET: 25382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nested.applyPattern(chars, symbols, patBuf, options); 25392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 25402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case SETMODE2_PROPERTYPAT: 25412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller chars.skipIgnored(opts); 25422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nested.applyPropertyPattern(chars, patBuf, symbols); 25432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 25442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case SETMODE3_PREPARSED: // `nested' already parsed 25452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nested._toPattern(patBuf, false); 25462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 25472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 25482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 25492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller usePat = true; 25502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 25512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (mode == MODE0_NONE) { 25522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Entire pattern is a category; leave parse loop 25532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set(nested); 25542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller mode = MODE2_OUTBRACKET; 25552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 25562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 25572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 25582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller switch (op) { 25592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case '-': 25602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller removeAll(nested); 25612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 25622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case '&': 25632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller retainAll(nested); 25642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 25652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case 0: 25662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller addAll(nested); 25672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 25682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 25692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 25702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller op = 0; 25712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastItem = LAST2_SET; 25722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 25732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 25742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 25752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 25762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (mode == MODE0_NONE) { 25772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller syntaxError(chars, "Missing '['"); 25782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 25792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 25802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // -------- Parse special (syntax) characters. If the 25812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // current character is not special, or if it is escaped, 25822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // then fall through and handle it below. 25832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 25842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!literal) { 25852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller switch (c) { 25862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case ']': 25872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (lastItem == LAST1_RANGE) { 25882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller add_unchecked(lastChar, lastChar); 25892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller _appendToPat(patBuf, lastChar, false); 25902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 25912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Treat final trailing '-' as a literal 25922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (op == '-') { 25932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller add_unchecked(op, op); 25942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller patBuf.append(op); 25952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (op == '&') { 25962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller syntaxError(chars, "Trailing '&'"); 25972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 25982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller patBuf.append(']'); 25992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller mode = MODE2_OUTBRACKET; 26002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 26012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case '-': 26022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (op == 0) { 26032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (lastItem != LAST0_START) { 26042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller op = (char) c; 26052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 26062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (lastString != null) { 26072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller op = (char) c; 26082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 26092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 26102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Treat final trailing '-' as a literal 26112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller add_unchecked(c, c); 26122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = chars.next(opts); 26132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller literal = chars.isEscaped(); 26142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c == ']' && !literal) { 26152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller patBuf.append("-]"); 26162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller mode = MODE2_OUTBRACKET; 26172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 26182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 26192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 26202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 26212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller syntaxError(chars, "'-' not after char, string, or set"); 26222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 26232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case '&': 26242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (lastItem == LAST2_SET && op == 0) { 26252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller op = (char) c; 26262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 26272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 26282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller syntaxError(chars, "'&' not after set"); 26292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 26302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case '^': 26312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller syntaxError(chars, "'^' not after '['"); 26322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 26332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case '{': 26342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (op != 0 && op != '-') { 26352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller syntaxError(chars, "Missing operand after operator"); 26362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 26372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (lastItem == LAST1_RANGE) { 26382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller add_unchecked(lastChar, lastChar); 26392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller _appendToPat(patBuf, lastChar, false); 26402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 26412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastItem = LAST0_START; 26422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (buf == null) { 26432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buf = new StringBuilder(); 26442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 26452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buf.setLength(0); 26462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 26472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean ok = false; 26482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (!chars.atEnd()) { 26492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = chars.next(opts); 26502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller literal = chars.isEscaped(); 26512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c == '}' && !literal) { 26522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ok = true; 26532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 26542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 26552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller appendCodePoint(buf, c); 26562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 26572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (buf.length() < 1 || !ok) { 26582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller syntaxError(chars, "Invalid multicharacter string"); 26592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 26602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We have new string. Add it to set and continue; 26612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // we don't need to drop through to the further 26622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // processing 26632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String curString = buf.toString(); 26642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (op == '-') { 26652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int lastSingle = CharSequences.getSingleCodePoint(lastString == null ? "" : lastString); 26662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int curSingle = CharSequences.getSingleCodePoint(curString); 26672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (lastSingle != Integer.MAX_VALUE && curSingle != Integer.MAX_VALUE) { 26682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller add(lastSingle,curSingle); 26692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 26702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 26712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringRange.expand(lastString, curString, true, strings); 26722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch (Exception e) { 26732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller syntaxError(chars, e.getMessage()); 26742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 26752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 26762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastString = null; 26772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller op = 0; 26782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 26792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller add(curString); 26802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastString = curString; 26812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 26822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller patBuf.append('{'); 26832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller _appendToPat(patBuf, curString, false); 26842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller patBuf.append('}'); 26852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 26862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case SymbolTable.SYMBOL_REF: 26872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // symbols nosymbols 26882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // [a-$] error error (ambiguous) 26892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // [a$] anchor anchor 26902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // [a-$x] var "x"* literal '$' 26912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // [a-$.] error literal '$' 26922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // *We won't get here in the case of var "x" 26932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller backup = chars.getPos(backup); 26942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = chars.next(opts); 26952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller literal = chars.isEscaped(); 26962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean anchor = (c == ']' && !literal); 26972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (symbols == null && !anchor) { 26982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = SymbolTable.SYMBOL_REF; 26992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller chars.setPos(backup); 27002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; // literal '$' 27012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 27022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (anchor && op == 0) { 27032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (lastItem == LAST1_RANGE) { 27042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller add_unchecked(lastChar, lastChar); 27052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller _appendToPat(patBuf, lastChar, false); 27062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 27072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller add_unchecked(UnicodeMatcher.ETHER); 27082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller usePat = true; 27092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller patBuf.append(SymbolTable.SYMBOL_REF).append(']'); 27102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller mode = MODE2_OUTBRACKET; 27112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 27122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 27132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller syntaxError(chars, "Unquoted '$'"); 27142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 27152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller default: 27162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 27172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 27182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 27192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 27202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // -------- Parse literal characters. This includes both 27212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // escaped chars ("\u4E01") and non-syntax characters 27222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // ("a"). 27232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 27242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller switch (lastItem) { 27252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case LAST0_START: 27262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (op == '-' && lastString != null) { 27272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller syntaxError(chars, "Invalid range"); 27282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 27292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastItem = LAST1_RANGE; 27302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastChar = c; 27312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastString = null; 27322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 27332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case LAST1_RANGE: 27342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (op == '-') { 27352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (lastString != null) { 27362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller syntaxError(chars, "Invalid range"); 27372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 27382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (lastChar >= c) { 27392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Don't allow redundant (a-a) or empty (b-a) ranges; 27402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // these are most likely typos. 27412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller syntaxError(chars, "Invalid range"); 27422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 27432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller add_unchecked(lastChar, c); 27442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller _appendToPat(patBuf, lastChar, false); 27452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller patBuf.append(op); 27462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller _appendToPat(patBuf, c, false); 27472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastItem = LAST0_START; 27482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller op = 0; 27492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 27502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller add_unchecked(lastChar, lastChar); 27512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller _appendToPat(patBuf, lastChar, false); 27522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastChar = c; 27532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 27542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 27552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case LAST2_SET: 27562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (op != 0) { 27572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller syntaxError(chars, "Set expected after operator"); 27582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 27592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastChar = c; 27602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastItem = LAST1_RANGE; 27612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 27622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 27632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 27642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 27652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (mode != MODE2_OUTBRACKET) { 27662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller syntaxError(chars, "Missing ']'"); 27672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 27682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 27692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller chars.skipIgnored(opts); 27702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 27712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 27722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Handle global flags (invert, case insensitivity). If this 27732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * pattern should be compiled case-insensitive, then we need 27742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * to close over case BEFORE COMPLEMENTING. This makes 27752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * patterns like /[^abc]/i work. 27762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 27772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if ((options & CASE) != 0) { 27782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller closeOver(CASE); 27792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 27802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (invert) { 27812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller complement(); 27822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 27832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 27842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Use the rebuilt pattern (pat) only if necessary. Prefer the 27852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // generated pattern. 27862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (usePat) { 27872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller append(rebuiltPat, patBuf.toString()); 27882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 27892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller appendNewPattern(rebuiltPat, false, true); 27902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 27912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 27922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 27932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static void syntaxError(RuleCharacterIterator chars, String msg) { 27942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("Error: " + msg + " at \"" + 27952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Utility.escape(chars.toString()) + 27962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller '"'); 27972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 27982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 27992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 28002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Add the contents of the UnicodeSet (as strings) into a collection. 28012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param target collection to add into 28022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 28032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public <T extends Collection<String>> T addAllTo(T target) { 28042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return addAllTo(this, target); 28052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 28062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 28072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 28082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 28092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Add the contents of the UnicodeSet (as strings) into a collection. 28102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param target collection to add into 281139fda05a2af93ea1422c26c0e570d6d7b4a4f4eeJoachim Sauer * @hide unsupported on Android 28122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 28132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public String[] addAllTo(String[] target) { 28142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return addAllTo(this, target); 28152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 28162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 28172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 28182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Add the contents of the UnicodeSet (as strings) into an array. 281939fda05a2af93ea1422c26c0e570d6d7b4a4f4eeJoachim Sauer * @hide unsupported on Android 28202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 28212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static String[] toArray(UnicodeSet set) { 28222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return addAllTo(set, new String[set.size()]); 28232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 28242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 28252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2826f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert * Add the contents of the collection (as strings) into this UnicodeSet. 28272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The collection must not contain null. 28282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param source the collection to add 28292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return a reference to this object 28302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 28312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet add(Iterable<?> source) { 28322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return addAll(source); 28332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 28342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 28352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 28362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Add a collection (as strings) into this UnicodeSet. 28372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Uses standard naming convention. 28382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param source collection to add into 28392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return a reference to this object 28402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 28412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet addAll(Iterable<?> source) { 28422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller checkFrozen(); 28432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (Object o : source) { 28442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller add(o.toString()); 28452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 28462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 28472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 28482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 28492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //---------------------------------------------------------------- 28502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Implementation: Utility methods 28512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //---------------------------------------------------------------- 28522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 28532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void ensureCapacity(int newLen) { 28542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (newLen <= list.length) return; 2855f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert int[] temp = new int[newLen + GROW_EXTRA]; 28562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.arraycopy(list, 0, temp, 0, len); 28572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list = temp; 28582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 28592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 28602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void ensureBufferCapacity(int newLen) { 28612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (buffer != null && newLen <= buffer.length) return; 28622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer = new int[newLen + GROW_EXTRA]; 28632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 28642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 28652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 28662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Assumes start <= end. 28672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 28682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int[] range(int start, int end) { 28692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (rangeList == null) { 28702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller rangeList = new int[] { start, end+1, HIGH }; 28712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 28722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller rangeList[0] = start; 28732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller rangeList[1] = end+1; 28742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 28752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return rangeList; 28762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 28772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 28782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //---------------------------------------------------------------- 28792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Implementation: Fundamental operations 28802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //---------------------------------------------------------------- 28812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 28822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // polarity = 0, 3 is normal: x xor y 28832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // polarity = 1, 2: x xor ~y == x === y 28842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 28852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private UnicodeSet xor(int[] other, int otherLen, int polarity) { 28862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ensureBufferCapacity(len + otherLen); 28872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i = 0, j = 0, k = 0; 28882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int a = list[i++]; 28892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int b; 28902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // TODO: Based on the call hierarchy, polarity of 1 or 2 is never used 28912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // so the following if statement will not be called. 28922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ///CLOVER:OFF 28932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (polarity == 1 || polarity == 2) { 28942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller b = LOW; 28952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (other[j] == LOW) { // skip base if already LOW 28962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++j; 28972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller b = other[j]; 28982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 28992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ///CLOVER:ON 29002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 29012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller b = other[j++]; 29022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 29032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // simplest of all the routines 29042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // sort the values, discarding identicals! 29052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (true) { 29062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (a < b) { 29072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer[k++] = a; 29082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller a = list[i++]; 29092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (b < a) { 29102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer[k++] = b; 29112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller b = other[j++]; 29122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (a != HIGH) { // at this point, a == b 29132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // discard both values! 29142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller a = list[i++]; 29152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller b = other[j++]; 29162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { // DONE! 29172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer[k++] = HIGH; 29182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller len = k; 29192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 29202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 29212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 29222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // swap list and buffer 29232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int[] temp = list; 29242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list = buffer; 29252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer = temp; 29262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pat = null; 29272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 29282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 29292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 29302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // polarity = 0 is normal: x union y 29312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // polarity = 2: x union ~y 29322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // polarity = 1: ~x union y 29332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // polarity = 3: ~x union ~y 29342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 29352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private UnicodeSet add(int[] other, int otherLen, int polarity) { 29362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ensureBufferCapacity(len + otherLen); 29372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i = 0, j = 0, k = 0; 29382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int a = list[i++]; 29392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int b = other[j++]; 29402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // change from xor is that we have to check overlapping pairs 29412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // polarity bit 1 means a is second, bit 2 means b is. 29422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller main: 29432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (true) { 29442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller switch (polarity) { 29452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case 0: // both first; take lower if unequal 29462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (a < b) { // take a 29472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Back up over overlapping ranges in buffer[] 29482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (k > 0 && a <= buffer[k-1]) { 29492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Pick latter end value in buffer[] vs. list[] 29502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller a = max(list[i], buffer[--k]); 29512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 29522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // No overlap 29532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer[k++] = a; 29542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller a = list[i]; 29552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 29562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i++; // Common if/else code factored out 29572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller polarity ^= 1; 29582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (b < a) { // take b 29592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (k > 0 && b <= buffer[k-1]) { 29602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller b = max(other[j], buffer[--k]); 29612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 29622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer[k++] = b; 29632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller b = other[j]; 29642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 29652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller j++; 29662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller polarity ^= 2; 29672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { // a == b, take a, drop b 29682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (a == HIGH) break main; 29692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // This is symmetrical; it doesn't matter if 29702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // we backtrack with a or b. - liu 29712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (k > 0 && a <= buffer[k-1]) { 29722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller a = max(list[i], buffer[--k]); 29732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 29742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // No overlap 29752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer[k++] = a; 29762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller a = list[i]; 29772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 29782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i++; 29792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller polarity ^= 1; 29802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller b = other[j++]; polarity ^= 2; 29812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 29822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 29832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case 3: // both second; take higher if unequal, and drop other 29842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (b <= a) { // take a 29852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (a == HIGH) break main; 29862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer[k++] = a; 29872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { // take b 29882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (b == HIGH) break main; 29892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer[k++] = b; 29902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 29912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller a = list[i++]; polarity ^= 1; // factored common code 29922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller b = other[j++]; polarity ^= 2; 29932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 29942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case 1: // a second, b first; if b < a, overlap 29952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (a < b) { // no overlap, take a 29962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer[k++] = a; a = list[i++]; polarity ^= 1; 29972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (b < a) { // OVERLAP, drop b 29982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller b = other[j++]; polarity ^= 2; 29992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { // a == b, drop both! 30002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (a == HIGH) break main; 30012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller a = list[i++]; polarity ^= 1; 30022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller b = other[j++]; polarity ^= 2; 30032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 30042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 30052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case 2: // a first, b second; if a < b, overlap 30062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (b < a) { // no overlap, take b 30072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer[k++] = b; b = other[j++]; polarity ^= 2; 30082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (a < b) { // OVERLAP, drop a 30092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller a = list[i++]; polarity ^= 1; 30102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { // a == b, drop both! 30112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (a == HIGH) break main; 30122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller a = list[i++]; polarity ^= 1; 30132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller b = other[j++]; polarity ^= 2; 30142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 30152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 30162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 30172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 30182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer[k++] = HIGH; // terminate 30192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller len = k; 30202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // swap list and buffer 30212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int[] temp = list; 30222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list = buffer; 30232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer = temp; 30242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pat = null; 30252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 30262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 30272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 30282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // polarity = 0 is normal: x intersect y 30292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // polarity = 2: x intersect ~y == set-minus 30302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // polarity = 1: ~x intersect y 30312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // polarity = 3: ~x intersect ~y 30322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 30332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private UnicodeSet retain(int[] other, int otherLen, int polarity) { 30342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ensureBufferCapacity(len + otherLen); 30352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i = 0, j = 0, k = 0; 30362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int a = list[i++]; 30372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int b = other[j++]; 30382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // change from xor is that we have to check overlapping pairs 30392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // polarity bit 1 means a is second, bit 2 means b is. 30402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller main: 30412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (true) { 30422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller switch (polarity) { 30432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case 0: // both first; drop the smaller 30442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (a < b) { // drop a 30452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller a = list[i++]; polarity ^= 1; 30462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (b < a) { // drop b 30472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller b = other[j++]; polarity ^= 2; 30482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { // a == b, take one, drop other 30492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (a == HIGH) break main; 30502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer[k++] = a; a = list[i++]; polarity ^= 1; 30512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller b = other[j++]; polarity ^= 2; 30522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 30532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 30542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case 3: // both second; take lower if unequal 30552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (a < b) { // take a 30562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer[k++] = a; a = list[i++]; polarity ^= 1; 30572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (b < a) { // take b 30582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer[k++] = b; b = other[j++]; polarity ^= 2; 30592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { // a == b, take one, drop other 30602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (a == HIGH) break main; 30612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer[k++] = a; a = list[i++]; polarity ^= 1; 30622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller b = other[j++]; polarity ^= 2; 30632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 30642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 30652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case 1: // a second, b first; 30662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (a < b) { // NO OVERLAP, drop a 30672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller a = list[i++]; polarity ^= 1; 30682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (b < a) { // OVERLAP, take b 30692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer[k++] = b; b = other[j++]; polarity ^= 2; 30702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { // a == b, drop both! 30712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (a == HIGH) break main; 30722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller a = list[i++]; polarity ^= 1; 30732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller b = other[j++]; polarity ^= 2; 30742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 30752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 30762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case 2: // a first, b second; if a < b, overlap 30772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (b < a) { // no overlap, drop b 30782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller b = other[j++]; polarity ^= 2; 30792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (a < b) { // OVERLAP, take a 30802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer[k++] = a; a = list[i++]; polarity ^= 1; 30812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { // a == b, drop both! 30822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (a == HIGH) break main; 30832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller a = list[i++]; polarity ^= 1; 30842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller b = other[j++]; polarity ^= 2; 30852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 30862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 30872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 30882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 30892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer[k++] = HIGH; // terminate 30902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller len = k; 30912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // swap list and buffer 30922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int[] temp = list; 30932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list = buffer; 30942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer = temp; 30952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pat = null; 30962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 30972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 30982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 30992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int max(int a, int b) { 31002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (a > b) ? a : b; 31012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 31022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 31032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //---------------------------------------------------------------- 31042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Generic filter-based scanning code 31052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //---------------------------------------------------------------- 31062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 31072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static interface Filter { 31082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean contains(int codePoint); 31092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 31102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 31112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static class NumericValueFilter implements Filter { 31122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller double value; 31132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller NumericValueFilter(double value) { this.value = value; } 3114f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 31152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean contains(int ch) { 31162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return UCharacter.getUnicodeNumericValue(ch) == value; 31172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 31182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 31192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 31202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static class GeneralCategoryMaskFilter implements Filter { 31212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int mask; 31222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller GeneralCategoryMaskFilter(int mask) { this.mask = mask; } 3123f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 31242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean contains(int ch) { 31252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ((1 << UCharacter.getType(ch)) & mask) != 0; 31262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 31272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 31282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 31292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static class IntPropertyFilter implements Filter { 31302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int prop; 31312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int value; 31322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller IntPropertyFilter(int prop, int value) { 31332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller this.prop = prop; 31342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller this.value = value; 31352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3136f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 31372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean contains(int ch) { 31382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return UCharacter.getIntPropertyValue(ch, prop) == value; 31392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 31402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 31412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 31422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static class ScriptExtensionsFilter implements Filter { 31432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int script; 31442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ScriptExtensionsFilter(int script) { this.script = script; } 3145f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 31462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean contains(int c) { 31472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return UScript.hasScript(c, script); 31482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 31492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 31502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 31512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // VersionInfo for unassigned characters 31522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final VersionInfo NO_VERSION = VersionInfo.getInstance(0, 0, 0, 0); 31532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 31542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static class VersionFilter implements Filter { 31552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller VersionInfo version; 31562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller VersionFilter(VersionInfo version) { this.version = version; } 3157f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 31582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean contains(int ch) { 31592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller VersionInfo v = UCharacter.getAge(ch); 31602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Reference comparison ok; VersionInfo caches and reuses 31612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // unique objects. 3162f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert return !Utility.sameObjects(v, NO_VERSION) && 31632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller v.compareTo(version) <= 0; 31642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 31652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 31662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 31672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static synchronized UnicodeSet getInclusions(int src) { 31682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (INCLUSIONS == null) { 31692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller INCLUSIONS = new UnicodeSet[UCharacterProperty.SRC_COUNT]; 31702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 31712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(INCLUSIONS[src] == null) { 31722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeSet incl = new UnicodeSet(); 31732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller switch(src) { 31742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case UCharacterProperty.SRC_CHAR: 31752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UCharacterProperty.INSTANCE.addPropertyStarts(incl); 31762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 31772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case UCharacterProperty.SRC_PROPSVEC: 31782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UCharacterProperty.INSTANCE.upropsvec_addPropertyStarts(incl); 31792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 31802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case UCharacterProperty.SRC_CHAR_AND_PROPSVEC: 31812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UCharacterProperty.INSTANCE.addPropertyStarts(incl); 31822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UCharacterProperty.INSTANCE.upropsvec_addPropertyStarts(incl); 31832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 31842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case UCharacterProperty.SRC_CASE_AND_NORM: 31852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Norm2AllModes.getNFCInstance().impl.addPropertyStarts(incl); 31862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UCaseProps.INSTANCE.addPropertyStarts(incl); 31872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 31882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case UCharacterProperty.SRC_NFC: 31892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Norm2AllModes.getNFCInstance().impl.addPropertyStarts(incl); 31902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 31912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case UCharacterProperty.SRC_NFKC: 31922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Norm2AllModes.getNFKCInstance().impl.addPropertyStarts(incl); 31932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 31942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case UCharacterProperty.SRC_NFKC_CF: 31952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Norm2AllModes.getNFKC_CFInstance().impl.addPropertyStarts(incl); 31962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 31972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case UCharacterProperty.SRC_NFC_CANON_ITER: 31982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Norm2AllModes.getNFCInstance().impl.addCanonIterPropertyStarts(incl); 31992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 32002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case UCharacterProperty.SRC_CASE: 32012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UCaseProps.INSTANCE.addPropertyStarts(incl); 32022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 32032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case UCharacterProperty.SRC_BIDI: 32042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UBiDiProps.INSTANCE.addPropertyStarts(incl); 32052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 32062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller default: 32072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalStateException("UnicodeSet.getInclusions(unknown src "+src+")"); 32082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 32092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller INCLUSIONS[src] = incl; 32102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 32112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return INCLUSIONS[src]; 32122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 32132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 32142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 32152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Generic filter-based scanning code for UCD property UnicodeSets. 32162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 32172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private UnicodeSet applyFilter(Filter filter, int src) { 32182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Logically, walk through all Unicode characters, noting the start 32192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // and end of each range for which filter.contain(c) is 32202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // true. Add each range to a set. 32212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 32222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // To improve performance, use an inclusions set which 32232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // encodes information about character ranges that are known 32242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // to have identical properties. 32252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // getInclusions(src) contains exactly the first characters of 32262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // same-value ranges for the given properties "source". 32272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 32282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller clear(); 32292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 32302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int startHasProperty = -1; 32312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeSet inclusions = getInclusions(src); 32322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int limitRange = inclusions.getRangeCount(); 32332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 32342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int j=0; j<limitRange; ++j) { 32352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // get current range 32362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int start = inclusions.getRangeStart(j); 32372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int end = inclusions.getRangeEnd(j); 32382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 32392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // for all the code points in the range, process 32402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int ch = start; ch <= end; ++ch) { 32412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // only add to the unicodeset on inflection points -- 32422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // where the hasProperty value changes to false 32432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (filter.contains(ch)) { 32442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (startHasProperty < 0) { 32452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller startHasProperty = ch; 32462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 32472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (startHasProperty >= 0) { 32482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller add_unchecked(startHasProperty, ch-1); 32492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller startHasProperty = -1; 32502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 32512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 32522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 32532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (startHasProperty >= 0) { 32542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller add_unchecked(startHasProperty, 0x10FFFF); 32552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 32562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 32572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 32582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 32592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 32602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 32612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 32622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Remove leading and trailing Pattern_White_Space and compress 32632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * internal Pattern_White_Space to a single space character. 32642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 32652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static String mungeCharName(String source) { 32662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller source = PatternProps.trimWhiteSpace(source); 32672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder buf = null; 32682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i=0; i<source.length(); ++i) { 32692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char ch = source.charAt(i); 32702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (PatternProps.isWhiteSpace(ch)) { 32712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (buf == null) { 32722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buf = new StringBuilder().append(source, 0, i); 32732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (buf.charAt(buf.length() - 1) == ' ') { 32742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 32752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 32762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ch = ' '; // convert to ' ' 32772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 32782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (buf != null) { 32792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buf.append(ch); 32802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 32812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 32822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return buf == null ? source : buf.toString(); 32832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 32842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 32852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //---------------------------------------------------------------- 32862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Property set API 32872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //---------------------------------------------------------------- 32882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 32892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 32902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Modifies this set to contain those code points which have the 32912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * given value for the given binary or enumerated property, as 32922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * returned by UCharacter.getIntPropertyValue. Prior contents of 32932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * this set are lost. 32942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 32952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param prop a property in the range 32962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * UProperty.BIN_START..UProperty.BIN_LIMIT-1 or 32972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * UProperty.INT_START..UProperty.INT_LIMIT-1 or. 32982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * UProperty.MASK_START..UProperty.MASK_LIMIT-1. 32992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 33002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param value a value in the range 33012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * UCharacter.getIntPropertyMinValue(prop).. 33022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * UCharacter.getIntPropertyMaxValue(prop), with one exception. 33032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If prop is UProperty.GENERAL_CATEGORY_MASK, then value should not be 33042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * a UCharacter.getType() result, but rather a mask value produced 3305bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * by logically ORing (1 << UCharacter.getType()) values together. 33062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * This allows grouped categories such as [:L:] to be represented. 33072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 33082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return a reference to this set 33092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 33102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet applyIntPropertyValue(int prop, int value) { 33112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller checkFrozen(); 33122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (prop == UProperty.GENERAL_CATEGORY_MASK) { 33132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller applyFilter(new GeneralCategoryMaskFilter(value), UCharacterProperty.SRC_CHAR); 33142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (prop == UProperty.SCRIPT_EXTENSIONS) { 33152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller applyFilter(new ScriptExtensionsFilter(value), UCharacterProperty.SRC_PROPSVEC); 33162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 33172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller applyFilter(new IntPropertyFilter(prop, value), UCharacterProperty.INSTANCE.getSource(prop)); 33182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 33192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 33202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 33212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 33222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 33232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 33242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 33252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Modifies this set to contain those code points which have the 33262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * given value for the given property. Prior contents of this 33272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * set are lost. 33282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 33292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param propertyAlias a property alias, either short or long. 33302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The name is matched loosely. See PropertyAliases.txt for names 33312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * and a description of loose matching. If the value string is 33322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * empty, then this string is interpreted as either a 33332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * General_Category value alias, a Script value alias, a binary 33342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * property alias, or a special ID. Special IDs are matched 33352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * loosely and correspond to the following sets: 33362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 333706ec6d9c5ca6a2e72ac506c8729e0a31db19e211Andrew Solovay * "ANY" = [\\u0000-\\u0010FFFF], 333806ec6d9c5ca6a2e72ac506c8729e0a31db19e211Andrew Solovay * "ASCII" = [\\u0000-\\u007F]. 33392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 33402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param valueAlias a value alias, either short or long. The 33412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * name is matched loosely. See PropertyValueAliases.txt for 33422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * names and a description of loose matching. In addition to 33432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * aliases listed, numeric values and canonical combining classes 33442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * may be expressed numerically, e.g., ("nv", "0.5") or ("ccc", 33452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * "220"). The value string may also be empty. 33462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 33472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return a reference to this set 33482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 33492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet applyPropertyAlias(String propertyAlias, String valueAlias) { 33502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return applyPropertyAlias(propertyAlias, valueAlias, null); 33512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 33522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 33532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 33542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Modifies this set to contain those code points which have the 33552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * given value for the given property. Prior contents of this 33562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * set are lost. 33572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param propertyAlias A string of the property alias. 33582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param valueAlias A string of the value alias. 33592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param symbols if not null, then symbols are first called to see if a property 33602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * is available. If true, then everything else is skipped. 33612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return this set 33622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 33632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet applyPropertyAlias(String propertyAlias, 33642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String valueAlias, SymbolTable symbols) { 33652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller checkFrozen(); 33662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int p; 33672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int v; 33682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean mustNotBeEmpty = false, invert = false; 33692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 33702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (symbols != null 33712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller && (symbols instanceof XSymbolTable) 33722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller && ((XSymbolTable)symbols).applyPropertyAlias(propertyAlias, valueAlias, this)) { 33732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 33742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 33752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 33762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (XSYMBOL_TABLE != null) { 33772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (XSYMBOL_TABLE.applyPropertyAlias(propertyAlias, valueAlias, this)) { 33782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 33792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 33802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 33812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 33822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (valueAlias.length() > 0) { 33832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller p = UCharacter.getPropertyEnum(propertyAlias); 33842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 33852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Treat gc as gcm 33862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (p == UProperty.GENERAL_CATEGORY) { 33872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller p = UProperty.GENERAL_CATEGORY_MASK; 33882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 33892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 33902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if ((p >= UProperty.BINARY_START && p < UProperty.BINARY_LIMIT) || 33912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (p >= UProperty.INT_START && p < UProperty.INT_LIMIT) || 33922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (p >= UProperty.MASK_START && p < UProperty.MASK_LIMIT)) { 33932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 33942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller v = UCharacter.getPropertyValueEnum(p, valueAlias); 33952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch (IllegalArgumentException e) { 33962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Handle numeric CCC 33972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (p == UProperty.CANONICAL_COMBINING_CLASS || 33982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller p == UProperty.LEAD_CANONICAL_COMBINING_CLASS || 33992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller p == UProperty.TRAIL_CANONICAL_COMBINING_CLASS) { 34002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller v = Integer.parseInt(PatternProps.trimWhiteSpace(valueAlias)); 34012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If the resultant set is empty then the numeric value 34022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // was invalid. 34032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //mustNotBeEmpty = true; 34042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // old code was wrong; anything between 0 and 255 is valid even if unused. 34052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (v < 0 || v > 255) throw e; 34062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 34072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw e; 34082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 34092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 34102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 34112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 34122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 34132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller switch (p) { 34142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case UProperty.NUMERIC_VALUE: 34152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 34162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller double value = Double.parseDouble(PatternProps.trimWhiteSpace(valueAlias)); 34172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller applyFilter(new NumericValueFilter(value), UCharacterProperty.SRC_CHAR); 34182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 34192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 34202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case UProperty.NAME: 34212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 34222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Must munge name, since 34232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // UCharacter.charFromName() does not do 34242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 'loose' matching. 34252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String buf = mungeCharName(valueAlias); 34262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int ch = UCharacter.getCharFromExtendedName(buf); 34272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (ch == -1) { 34282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("Invalid character name"); 34292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 34302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller clear(); 34312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller add_unchecked(ch); 34322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 34332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 34342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case UProperty.UNICODE_1_NAME: 34352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // ICU 49 deprecates the Unicode_1_Name property APIs. 34362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("Unicode_1_Name (na1) not supported"); 34372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case UProperty.AGE: 34382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 34392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Must munge name, since 34402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // VersionInfo.getInstance() does not do 34412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 'loose' matching. 34422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller VersionInfo version = VersionInfo.getInstance(mungeCharName(valueAlias)); 34432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller applyFilter(new VersionFilter(version), UCharacterProperty.SRC_PROPSVEC); 34442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 34452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 34462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case UProperty.SCRIPT_EXTENSIONS: 34472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller v = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, valueAlias); 34482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // fall through to calling applyIntPropertyValue() 34492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 34502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller default: 34512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // p is a non-binary, non-enumerated property that we 34522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // don't support (yet). 34532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("Unsupported property"); 34542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 34552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 34562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 34572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 34582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 34592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // valueAlias is empty. Interpret as General Category, Script, 34602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Binary property, or ANY or ASCII. Upon success, p and v will 34612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // be set. 34622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UPropertyAliases pnames = UPropertyAliases.INSTANCE; 34632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller p = UProperty.GENERAL_CATEGORY_MASK; 34642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller v = pnames.getPropertyValueEnum(p, propertyAlias); 34652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (v == UProperty.UNDEFINED) { 34662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller p = UProperty.SCRIPT; 34672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller v = pnames.getPropertyValueEnum(p, propertyAlias); 34682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (v == UProperty.UNDEFINED) { 34692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller p = pnames.getPropertyEnum(propertyAlias); 34702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (p == UProperty.UNDEFINED) { 34712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller p = -1; 34722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 34732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (p >= UProperty.BINARY_START && p < UProperty.BINARY_LIMIT) { 34742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller v = 1; 34752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (p == -1) { 34762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (0 == UPropertyAliases.compare(ANY_ID, propertyAlias)) { 34772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set(MIN_VALUE, MAX_VALUE); 34782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 34792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (0 == UPropertyAliases.compare(ASCII_ID, propertyAlias)) { 34802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set(0, 0x7F); 34812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 34822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (0 == UPropertyAliases.compare(ASSIGNED, propertyAlias)) { 34832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // [:Assigned:]=[:^Cn:] 34842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller p = UProperty.GENERAL_CATEGORY_MASK; 34852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller v = (1<<UCharacter.UNASSIGNED); 34862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller invert = true; 34872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 34882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Property name was never matched. 34892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("Invalid property alias: " + propertyAlias + "=" + valueAlias); 34902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 34912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 34922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Valid propery name, but it isn't binary, so the value 34932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // must be supplied. 34942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("Missing property value"); 34952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 34962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 34972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 34982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 34992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 35002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller applyIntPropertyValue(p, v); 35012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(invert) { 35022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller complement(); 35032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 35042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 35052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (mustNotBeEmpty && isEmpty()) { 35062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // mustNotBeEmpty is set to true if an empty set indicates 35072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // invalid input. 35082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("Invalid property value"); 35092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 35102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 35112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 35122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 35132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 35142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //---------------------------------------------------------------- 35152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Property set patterns 35162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //---------------------------------------------------------------- 35172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 35182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 35192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Return true if the given position, in the given pattern, appears 35202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * to be the start of a property set pattern. 35212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 35222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static boolean resemblesPropertyPattern(String pattern, int pos) { 35232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Patterns are at least 5 characters long 35242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if ((pos+5) > pattern.length()) { 35252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 35262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 35272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 35282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Look for an opening [:, [:^, \p, or \P 35292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return pattern.regionMatches(pos, "[:", 0, 2) || 35302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pattern.regionMatches(true, pos, "\\p", 0, 2) || 35312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pattern.regionMatches(pos, "\\N", 0, 2); 35322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 35332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 35342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 35352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Return true if the given iterator appears to point at a 35362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * property pattern. Regardless of the result, return with the 35372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * iterator unchanged. 35382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param chars iterator over the pattern characters. Upon return 35392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * it will be unchanged. 35402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param iterOpts RuleCharacterIterator options 35412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 35422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static boolean resemblesPropertyPattern(RuleCharacterIterator chars, 35432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int iterOpts) { 35442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean result = false; 35452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller iterOpts &= ~RuleCharacterIterator.PARSE_ESCAPES; 35462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Object pos = chars.getPos(null); 35472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c = chars.next(iterOpts); 35482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c == '[' || c == '\\') { 35492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int d = chars.next(iterOpts & ~RuleCharacterIterator.SKIP_WHITESPACE); 35502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result = (c == '[') ? (d == ':') : 35512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (d == 'N' || d == 'p' || d == 'P'); 35522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 35532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller chars.setPos(pos); 35542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return result; 35552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 35562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 35572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 35582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Parse the given property pattern at the given parse position. 35592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param symbols TODO 35602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 35612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private UnicodeSet applyPropertyPattern(String pattern, ParsePosition ppos, SymbolTable symbols) { 35622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int pos = ppos.getIndex(); 35632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 35642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // On entry, ppos should point to one of the following locations: 35652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 35662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Minimum length is 5 characters, e.g. \p{L} 35672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if ((pos+5) > pattern.length()) { 35682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return null; 35692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 35702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 35712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean posix = false; // true for [:pat:], false for \p{pat} \P{pat} \N{pat} 35722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean isName = false; // true for \N{pat}, o/w false 35732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean invert = false; 35742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 35752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Look for an opening [:, [:^, \p, or \P 35762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (pattern.regionMatches(pos, "[:", 0, 2)) { 35772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller posix = true; 35782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pos = PatternProps.skipWhiteSpace(pattern, (pos+2)); 35792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (pos < pattern.length() && pattern.charAt(pos) == '^') { 35802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++pos; 35812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller invert = true; 35822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 35832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (pattern.regionMatches(true, pos, "\\p", 0, 2) || 35842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pattern.regionMatches(pos, "\\N", 0, 2)) { 35852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char c = pattern.charAt(pos+1); 35862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller invert = (c == 'P'); 35872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller isName = (c == 'N'); 35882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pos = PatternProps.skipWhiteSpace(pattern, (pos+2)); 35892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (pos == pattern.length() || pattern.charAt(pos++) != '{') { 35902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Syntax error; "\p" or "\P" not followed by "{" 35912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return null; 35922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 35932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 35942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Open delimiter not seen 35952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return null; 35962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 35972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 35982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Look for the matching close delimiter, either :] or } 35992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int close = pattern.indexOf(posix ? ":]" : "}", pos); 36002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (close < 0) { 36012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Syntax error; close delimiter missing 36022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return null; 36032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 36042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 36052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Look for an '=' sign. If this is present, we will parse a 36062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // medium \p{gc=Cf} or long \p{GeneralCategory=Format} 36072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // pattern. 36082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int equals = pattern.indexOf('=', pos); 36092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String propName, valueName; 36102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (equals >= 0 && equals < close && !isName) { 36112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Equals seen; parse medium/long pattern 36122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller propName = pattern.substring(pos, equals); 36132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller valueName = pattern.substring(equals+1, close); 36142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 36152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 36162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 36172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Handle case where no '=' is seen, and \N{} 36182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller propName = pattern.substring(pos, close); 36192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller valueName = ""; 36202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 36212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Handle \N{name} 36222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (isName) { 36232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // This is a little inefficient since it means we have to 36242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // parse "na" back to UProperty.NAME even though we already 36252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // know it's UProperty.NAME. If we refactor the API to 36262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // support args of (int, String) then we can remove 36272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // "na" and make this a little more efficient. 36282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller valueName = propName; 36292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller propName = "na"; 36302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 36312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 36322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 36332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller applyPropertyAlias(propName, valueName, symbols); 36342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 36352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (invert) { 36362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller complement(); 36372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 36382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 36392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Move to the limit position after the close delimiter 36402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ppos.setIndex(close + (posix ? 2 : 1)); 36412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 36422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 36432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 36442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 36452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 36462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Parse a property pattern. 36472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param chars iterator over the pattern characters. Upon return 36482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * it will be advanced to the first character after the parsed 36492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * pattern, or the end of the iteration if all characters are 36502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * parsed. 36512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param rebuiltPat the pattern that was parsed, rebuilt or 36522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * copied from the input pattern, as appropriate. 36532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param symbols TODO 36542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 36552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void applyPropertyPattern(RuleCharacterIterator chars, 36562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Appendable rebuiltPat, SymbolTable symbols) { 36572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String patStr = chars.lookahead(); 36582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ParsePosition pos = new ParsePosition(0); 36592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller applyPropertyPattern(patStr, pos, symbols); 36602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (pos.getIndex() == 0) { 36612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller syntaxError(chars, "Invalid property pattern"); 36622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 36632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller chars.jumpahead(pos.getIndex()); 36642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller append(rebuiltPat, patStr.substring(0, pos.getIndex())); 36652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 36662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 36672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //---------------------------------------------------------------- 36682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Case folding API 36692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //---------------------------------------------------------------- 36702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 36712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 36722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Bitmask for constructor and applyPattern() indicating that 36732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * white space should be ignored. If set, ignore Unicode Pattern_White_Space characters, 36742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * unless they are quoted or escaped. This may be ORed together 36752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * with other selectors. 36762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 36772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int IGNORE_SPACE = 1; 36782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 36792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 36802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Bitmask for constructor, applyPattern(), and closeOver() 36812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * indicating letter case. This may be ORed together with other 36822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * selectors. 36832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 36842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Enable case insensitive matching. E.g., "[ab]" with this flag 36852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will 36862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * match all except 'a', 'A', 'b', and 'B'. This performs a full 36872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * closure over case mappings, e.g. U+017F for s. 36882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 36892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The resulting set is a superset of the input for the code points but 36902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * not for the strings. 36912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * It performs a case mapping closure of the code points and adds 36922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * full case folding strings for the code points, and reduces strings of 36932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the original set to their full case folding equivalents. 36942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 36952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * This is designed for case-insensitive matches, for example 36962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * in regular expressions. The full code point case closure allows checking of 36972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * an input character directly against the closure set. 36982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Strings are matched by comparing the case-folded form from the closure 36992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * set with an incremental case folding of the string in question. 37002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 37012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The closure set will also contain single code points if the original 37022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.). 37032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * This is not necessary (that is, redundant) for the above matching method 37042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * but results in the same closure sets regardless of whether the original 37052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * set contained the code point or a string. 37062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 37072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int CASE = 2; 37082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 37092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 37102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Alias for UnicodeSet.CASE, for ease of porting from C++ where ICU4C 37112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * also has both USET_CASE and USET_CASE_INSENSITIVE (see uset.h). 37122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see #CASE 37132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 37142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int CASE_INSENSITIVE = 2; 37152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 37162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 37172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Bitmask for constructor, applyPattern(), and closeOver() 37182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * indicating letter case. This may be ORed together with other 37192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * selectors. 37202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 37212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Enable case insensitive matching. E.g., "[ab]" with this flag 37222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will 37232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * match all except 'a', 'A', 'b', and 'B'. This adds the lower-, 37242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * title-, and uppercase mappings as well as the case folding 37252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * of each existing element in the set. 37262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 37272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int ADD_CASE_MAPPINGS = 4; 37282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 37292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // add the result of a full case mapping to the set 37302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // use str as a temporary string to avoid constructing one 37312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final void addCaseMapping(UnicodeSet set, int result, StringBuilder full) { 37322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(result >= 0) { 37332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(result > UCaseProps.MAX_STRING_LENGTH) { 37342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // add a single-code point case mapping 37352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.add(result); 37362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 37372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // add a string case mapping from full with length result 37382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.add(full.toString()); 37392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller full.setLength(0); 37402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 37412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 37422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // result < 0: the code point mapped to itself, no need to add it 37432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // see UCaseProps 37442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 37452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 37462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 37472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Close this set over the given attribute. For the attribute 37482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * CASE, the result is to modify this set so that: 37492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 37502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 1. For each character or string 'a' in this set, all strings 37512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 'b' such that foldCase(a) == foldCase(b) are added to this set. 37522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * (For most 'a' that are single characters, 'b' will have 37532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * b.length() == 1.) 37542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 37552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 2. For each string 'e' in the resulting set, if e != 37562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * foldCase(e), 'e' will be removed. 37572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 3758bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * Example: [aq\u00DF{Bc}{bC}{Fi}] => [aAqQ\u00DF\uFB01{ss}{bc}{fi}] 37592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 37602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * (Here foldCase(x) refers to the operation 37612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * UCharacter.foldCase(x, true), and a == b actually denotes 37622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * a.equals(b), not pointer comparison.) 37632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 37642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param attribute bitmask for attributes to close over. 37652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Currently only the CASE bit is supported. Any undefined bits 37662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * are ignored. 37672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return a reference to this set. 37682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 37692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet closeOver(int attribute) { 37702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller checkFrozen(); 37712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if ((attribute & (CASE | ADD_CASE_MAPPINGS)) != 0) { 37722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UCaseProps csp = UCaseProps.INSTANCE; 37732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeSet foldSet = new UnicodeSet(this); 37742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ULocale root = ULocale.ROOT; 37752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 37762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // start with input set to guarantee inclusion 37772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // CASE: remove strings because the strings will actually be reduced (folded); 37782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // therefore, start with no strings and add only those needed 37792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((attribute & CASE) != 0) { 37802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller foldSet.strings.clear(); 37812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 37822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 37832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int n = getRangeCount(); 37842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int result; 37852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder full = new StringBuilder(); 37862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 37872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i=0; i<n; ++i) { 37882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int start = getRangeStart(i); 37892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int end = getRangeEnd(i); 37902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 37912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((attribute & CASE) != 0) { 37922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // full case closure 37932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int cp=start; cp<=end; ++cp) { 37942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller csp.addCaseClosure(cp, foldSet); 37952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 37962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 37972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // add case mappings 37982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // (does not add long s for regular s, or Kelvin for k, for example) 37992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int cp=start; cp<=end; ++cp) { 38003ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert result = csp.toFullLower(cp, null, full, UCaseProps.LOC_ROOT); 38012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller addCaseMapping(foldSet, result, full); 38022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 38033ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert result = csp.toFullTitle(cp, null, full, UCaseProps.LOC_ROOT); 38042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller addCaseMapping(foldSet, result, full); 38052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 38063ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert result = csp.toFullUpper(cp, null, full, UCaseProps.LOC_ROOT); 38072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller addCaseMapping(foldSet, result, full); 38082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 38092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result = csp.toFullFolding(cp, full, 0); 38102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller addCaseMapping(foldSet, result, full); 38112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 38122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 38132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 38142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!strings.isEmpty()) { 38152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if ((attribute & CASE) != 0) { 38162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (String s : strings) { 38172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String str = UCharacter.foldCase(s, 0); 38182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!csp.addStringCaseClosure(str, foldSet)) { 38192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller foldSet.add(str); // does not map to code points: add the folded string itself 38202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 38212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 38222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 38232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller BreakIterator bi = BreakIterator.getWordInstance(root); 38242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (String str : strings) { 38253ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert // TODO: call lower-level functions 38262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller foldSet.add(UCharacter.toLowerCase(root, str)); 38272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller foldSet.add(UCharacter.toTitleCase(root, str, bi)); 38282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller foldSet.add(UCharacter.toUpperCase(root, str)); 38292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller foldSet.add(UCharacter.foldCase(str, 0)); 38302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 38312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 38322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 38332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set(foldSet); 38342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 38352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 38362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 38372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 38382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 38392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Internal class for customizing UnicodeSet parsing of properties. 38402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * TODO: extend to allow customizing of codepoint ranges 38412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @author medavis 3842836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide draft / provisional / internal are hidden on Android 38432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 38442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller abstract public static class XSymbolTable implements SymbolTable { 38452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 38462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Default constructor 3847836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide draft / provisional / internal are hidden on Android 38482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 38492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public XSymbolTable(){} 38502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 38512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Supplies default implementation for SymbolTable (no action). 3852836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide draft / provisional / internal are hidden on Android 38532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3854f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 38552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeMatcher lookupMatcher(int i) { 38562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return null; 38572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 38582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 38592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 38602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Override the interpretation of the sequence [:propertyName=propertyValue:] (and its negated and Perl-style 38612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * variant). The propertyName and propertyValue may be existing Unicode aliases, or may not be. 38622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p> 38632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * This routine will be called whenever the parsing of a UnicodeSet pattern finds such a 38642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * propertyName+propertyValue combination. 3865f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert * 38662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param propertyName 38672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the name of the property 38682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param propertyValue 38692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the name of the property value 38702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param result UnicodeSet value to change 38712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * a set to which the characters having the propertyName+propertyValue are to be added. 38722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return returns true if the propertyName+propertyValue combination is to be overridden, and the characters 38732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * with that property have been added to the UnicodeSet, and returns false if the 38742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * propertyName+propertyValue combination is not recognized (in which case result is unaltered). 3875836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide draft / provisional / internal are hidden on Android 38762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 38772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean applyPropertyAlias(String propertyName, String propertyValue, UnicodeSet result) { 38782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 38792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 38802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 38812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Supplies default implementation for SymbolTable (no action). 3882836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide draft / provisional / internal are hidden on Android 38832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3884f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 38852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public char[] lookup(String s) { 38862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return null; 38872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 38882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 38892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Supplies default implementation for SymbolTable (no action). 3890836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide draft / provisional / internal are hidden on Android 38912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3892f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 38932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public String parseReference(String text, ParsePosition pos, int limit) { 38942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return null; 38952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 38962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 38972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 38982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 38992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Is this frozen, according to the Freezable interface? 3900f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert * 39012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return value 39022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3903f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 39042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean isFrozen() { 39052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (bmpSet != null || stringSpan != null); 39062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 39072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 39082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 39092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Freeze this class, according to the Freezable interface. 3910f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert * 39112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return this 39122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3913f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 39142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet freeze() { 39152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!isFrozen()) { 39162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Do most of what compact() does before freezing because 39172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // compact() will not work when the set is frozen. 39182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Small modification: Don't shrink if the savings would be tiny (<=GROW_EXTRA). 39192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 39202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Delete buffer first to defragment memory less. 39212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer = null; 39222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (list.length > (len + GROW_EXTRA)) { 39232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Make the capacity equal to len or 1. 39242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We don't want to realloc of 0 size. 39252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int capacity = (len == 0) ? 1 : len; 39262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int[] oldList = list; 39272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list = new int[capacity]; 39282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i = capacity; i-- > 0;) { 39292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list[i] = oldList[i]; 39302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 39312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 39322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 39332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Optimize contains() and span() and similar functions. 39342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!strings.isEmpty()) { 39352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller stringSpan = new UnicodeSetStringSpan(this, new ArrayList<String>(strings), UnicodeSetStringSpan.ALL); 39362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 39372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (stringSpan == null || !stringSpan.needsStringSpanUTF16()) { 39382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Optimize for code point spans. 39392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // There are no strings, or 39402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // all strings are irrelevant for span() etc. because 39412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // all of each string's code points are contained in this set. 39422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // However, fully contained strings are relevant for spanAndCount(), 39432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // so we create both objects. 39442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller bmpSet = new BMPSet(list, len); 39452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 39462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 39472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 39482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 39492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 39502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 39512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Span a string using this UnicodeSet. 39522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p>To replace, count elements, or delete spans, see {@link android.icu.text.UnicodeSetSpanner UnicodeSetSpanner}. 39532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param s The string to be spanned 39542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param spanCondition The span condition 39552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return the length of the span 39562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 39572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int span(CharSequence s, SpanCondition spanCondition) { 39582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return span(s, 0, spanCondition); 39592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 39602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 39612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 39622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Span a string using this UnicodeSet. 39632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If the start index is less than 0, span will start from 0. 39642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If the start index is greater than the string length, span returns the string length. 39652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p>To replace, count elements, or delete spans, see {@link android.icu.text.UnicodeSetSpanner UnicodeSetSpanner}. 39662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param s The string to be spanned 39672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param start The start index that the span begins 39682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param spanCondition The span condition 39692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return the string index which ends the span (i.e. exclusive) 39702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 39712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int span(CharSequence s, int start, SpanCondition spanCondition) { 39722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int end = s.length(); 39732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (start < 0) { 39742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller start = 0; 39752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (start >= end) { 39762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return end; 39772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 39782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (bmpSet != null) { 39792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Frozen set without strings, or no string is relevant for span(). 39802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return bmpSet.span(s, start, spanCondition, null); 39812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 39822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (stringSpan != null) { 39832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return stringSpan.span(s, start, spanCondition); 39842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (!strings.isEmpty()) { 39852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int which = spanCondition == SpanCondition.NOT_CONTAINED ? UnicodeSetStringSpan.FWD_UTF16_NOT_CONTAINED 39862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller : UnicodeSetStringSpan.FWD_UTF16_CONTAINED; 39872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeSetStringSpan strSpan = new UnicodeSetStringSpan(this, new ArrayList<String>(strings), which); 39882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (strSpan.needsStringSpanUTF16()) { 39892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return strSpan.span(s, start, spanCondition); 39902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 39912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 39922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 39932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return spanCodePointsAndCount(s, start, spanCondition, null); 39942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 39952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 39962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 39972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Same as span() but also counts the smallest number of set elements on any path across the span. 39982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p>To replace, count elements, or delete spans, see {@link android.icu.text.UnicodeSetSpanner UnicodeSetSpanner}. 39992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param outCount An output-only object (must not be null) for returning the count. 40002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return the limit (exclusive end) of the span 40012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @deprecated This API is ICU internal only. 400293cf604e9dd0525f15bc0a7450b2a35f3884c298Neil Fuller * @hide original deprecated declaration 4003836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide draft / provisional / internal are hidden on Android 40042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 40052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller @Deprecated 40062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int spanAndCount(CharSequence s, int start, SpanCondition spanCondition, OutputInt outCount) { 40072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (outCount == null) { 40082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("outCount must not be null"); 40092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 40102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int end = s.length(); 40112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (start < 0) { 40122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller start = 0; 40132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (start >= end) { 40142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return end; 40152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 40162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (stringSpan != null) { 40172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We might also have bmpSet != null, 40182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // but fully-contained strings are relevant for counting elements. 40192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return stringSpan.spanAndCount(s, start, spanCondition, outCount); 40202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (bmpSet != null) { 40212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return bmpSet.span(s, start, spanCondition, outCount); 40222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (!strings.isEmpty()) { 40232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int which = spanCondition == SpanCondition.NOT_CONTAINED ? UnicodeSetStringSpan.FWD_UTF16_NOT_CONTAINED 40242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller : UnicodeSetStringSpan.FWD_UTF16_CONTAINED; 40252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller which |= UnicodeSetStringSpan.WITH_COUNT; 40262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeSetStringSpan strSpan = new UnicodeSetStringSpan(this, new ArrayList<String>(strings), which); 40272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return strSpan.spanAndCount(s, start, spanCondition, outCount); 40282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 40292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 40302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return spanCodePointsAndCount(s, start, spanCondition, outCount); 40312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 40322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 40332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int spanCodePointsAndCount(CharSequence s, int start, 40342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller SpanCondition spanCondition, OutputInt outCount) { 40352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Pin to 0/1 values. 40362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean spanContained = (spanCondition != SpanCondition.NOT_CONTAINED); 40372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 40382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c; 40392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int next = start; 40402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int length = s.length(); 40412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int count = 0; 40422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller do { 40432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = Character.codePointAt(s, next); 40442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (spanContained != contains(c)) { 40452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 40462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 40472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++count; 40482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller next += Character.charCount(c); 40492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } while (next < length); 40502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (outCount != null) { outCount.value = count; } 40512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return next; 40522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 40532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 40542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 40552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Span a string backwards (from the end) using this UnicodeSet. 40562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p>To replace, count elements, or delete spans, see {@link android.icu.text.UnicodeSetSpanner UnicodeSetSpanner}. 40572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param s The string to be spanned 40582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param spanCondition The span condition 40592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return The string index which starts the span (i.e. inclusive). 40602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 40612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int spanBack(CharSequence s, SpanCondition spanCondition) { 40622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return spanBack(s, s.length(), spanCondition); 40632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 40642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 40652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 40662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Span a string backwards (from the fromIndex) using this UnicodeSet. 40672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If the fromIndex is less than 0, spanBack will return 0. 40682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If fromIndex is greater than the string length, spanBack will start from the string length. 40692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p>To replace, count elements, or delete spans, see {@link android.icu.text.UnicodeSetSpanner UnicodeSetSpanner}. 40702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param s The string to be spanned 40712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param fromIndex The index of the char (exclusive) that the string should be spanned backwards 40722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param spanCondition The span condition 40732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return The string index which starts the span (i.e. inclusive). 40742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 40752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int spanBack(CharSequence s, int fromIndex, SpanCondition spanCondition) { 40762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fromIndex <= 0) { 40772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; 40782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 40792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fromIndex > s.length()) { 40802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fromIndex = s.length(); 40812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 40822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (bmpSet != null) { 40832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Frozen set without strings, or no string is relevant for spanBack(). 40842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return bmpSet.spanBack(s, fromIndex, spanCondition); 40852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 40862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (stringSpan != null) { 40872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return stringSpan.spanBack(s, fromIndex, spanCondition); 40882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (!strings.isEmpty()) { 40892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int which = (spanCondition == SpanCondition.NOT_CONTAINED) 40902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ? UnicodeSetStringSpan.BACK_UTF16_NOT_CONTAINED 40912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller : UnicodeSetStringSpan.BACK_UTF16_CONTAINED; 40922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeSetStringSpan strSpan = new UnicodeSetStringSpan(this, new ArrayList<String>(strings), which); 40932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (strSpan.needsStringSpanUTF16()) { 40942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return strSpan.spanBack(s, fromIndex, spanCondition); 40952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 40962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 40972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 40982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Pin to 0/1 values. 40992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean spanContained = (spanCondition != SpanCondition.NOT_CONTAINED); 41002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 41012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c; 41022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int prev = fromIndex; 41032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller do { 41042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = Character.codePointBefore(s, prev); 41052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (spanContained != contains(c)) { 41062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 41072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 41082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prev -= Character.charCount(c); 41092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } while (prev > 0); 41102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return prev; 41112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 41122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 41132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 41142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Clone a thawed version of this class, according to the Freezable interface. 41152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return the clone, not frozen 41162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 4117f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 41182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet cloneAsThawed() { 41192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeSet result = new UnicodeSet(this); 41202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert !result.isFrozen(); 41212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return result; 41222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 41232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 41242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // internal function 41252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void checkFrozen() { 41262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (isFrozen()) { 41272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new UnsupportedOperationException("Attempt to modify frozen object"); 41282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 41292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 41302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 41312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // ************************ 41322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Additional methods for integration with Generics and Collections 41332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // ************************ 41342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 41352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 41362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * A struct-like class used for iteration through ranges, for faster iteration than by String. 41372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Read about the restrictions on usage in {@link UnicodeSet#ranges()}. 41382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 41392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static class EntryRange { 41402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 41412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The starting code point of the range. 41422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 41432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int codepoint; 41442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 41452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The ending code point of the range 41462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 41472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int codepointEnd; 41482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 41492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller EntryRange() { 41502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 41512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 41522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 41532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * {@inheritDoc} 41542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 41552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller @Override 41562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public String toString() { 41572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder b = new StringBuilder(); 4158f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert return ( 41592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller codepoint == codepointEnd ? _appendToPat(b, codepoint, false) 41602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller : _appendToPat(_appendToPat(b, codepoint, false).append('-'), codepointEnd, false)) 41612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller .toString(); 41622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 41632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 41642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 41652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 41662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Provide for faster iteration than by String. Returns an Iterable/Iterator over ranges of code points. 41672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The UnicodeSet must not be altered during the iteration. 41682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The EntryRange instance is the same each time; the contents are just reset. 41692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 41702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p><b>Warning: </b>To iterate over the full contents, you have to also iterate over the strings. 41712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 4172f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert * <p><b>Warning: </b>For speed, UnicodeSet iteration does not check for concurrent modification. 41732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Do not alter the UnicodeSet while iterating. 4174f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert * 41752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <pre> 41762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * // Sample code 41772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * for (EntryRange range : us1.ranges()) { 41782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * // do something with code points between range.codepoint and range.codepointEnd; 41792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * } 41802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * for (String s : us1.strings()) { 41812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * // do something with each string; 41822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * } 41832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </pre> 41842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 41852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public Iterable<EntryRange> ranges() { 41862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return new EntryRangeIterable(); 41872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 41882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 41892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private class EntryRangeIterable implements Iterable<EntryRange> { 4190f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 41912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public Iterator<EntryRange> iterator() { 41922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return new EntryRangeIterator(); 41932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 41942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 41952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 41962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private class EntryRangeIterator implements Iterator<EntryRange> { 41972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int pos; 41982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller EntryRange result = new EntryRange(); 41992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4200f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 42012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean hasNext() { 42022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return pos < len-1; 42032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4204f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 42052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public EntryRange next() { 42062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (pos < len-1) { 42072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.codepoint = list[pos++]; 42082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.codepointEnd = list[pos++]-1; 42092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 42102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new NoSuchElementException(); 42112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 42122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return result; 42132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4214f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 42152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void remove() { 42162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new UnsupportedOperationException(); 42172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 42182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 42192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 42202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 42212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 42222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns a string iterator. Uses the same order of iteration as {@link UnicodeSetIterator}. 4223f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert * <p><b>Warning: </b>For speed, UnicodeSet iteration does not check for concurrent modification. 42242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Do not alter the UnicodeSet while iterating. 42252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see java.util.Set#iterator() 42262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 4227f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 42282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public Iterator<String> iterator() { 42292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return new UnicodeSetIterator2(this); 42302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 42312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4232f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert // Cover for string iteration. 42332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static class UnicodeSetIterator2 implements Iterator<String> { 42342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Invariants: 42352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // sourceList != null then sourceList[item] is a valid character 42362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // sourceList == null then delegates to stringIterator 42372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int[] sourceList; 42382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int len; 42392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int item; 42402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int current; 42412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int limit; 42422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private TreeSet<String> sourceStrings; 42432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private Iterator<String> stringIterator; 42442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private char[] buffer; 42452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 42462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeSetIterator2(UnicodeSet source) { 42472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // set according to invariants 42482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller len = source.len - 1; 42492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (len > 0) { 42502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sourceStrings = source.strings; 42512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sourceList = source.list; 42522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller current = sourceList[item++]; 42532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller limit = sourceList[item++]; 42542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 42552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller stringIterator = source.strings.iterator(); 42562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sourceList = null; 42572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 42582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 42592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 42602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* (non-Javadoc) 42612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see java.util.Iterator#hasNext() 42622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 4263f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 42642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean hasNext() { 42652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return sourceList != null || stringIterator.hasNext(); 42662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 42672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 42682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* (non-Javadoc) 42692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see java.util.Iterator#next() 42702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 4271f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 42722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public String next() { 42732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (sourceList == null) { 42742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return stringIterator.next(); 42752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 42762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int codepoint = current++; 42772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // we have the codepoint we need, but we may need to adjust the state 42782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (current >= limit) { 42792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (item >= len) { 42802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller stringIterator = sourceStrings.iterator(); 42812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sourceList = null; 42822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 42832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller current = sourceList[item++]; 42842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller limit = sourceList[item++]; 42852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 42862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 42872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Now return. Single code point is easy 42882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (codepoint <= 0xFFFF) { 42892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return String.valueOf((char)codepoint); 42902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 42912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // But Java lacks a valueOfCodePoint, so we handle ourselves for speed 42922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // allocate a buffer the first time, to make conversion faster. 42932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (buffer == null) { 42942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer = new char[2]; 42952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 42962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // compute ourselves, to save tests and calls 42972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int offset = codepoint - Character.MIN_SUPPLEMENTARY_CODE_POINT; 42982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer[0] = (char)((offset >>> 10) + Character.MIN_HIGH_SURROGATE); 42992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer[1] = (char)((offset & 0x3ff) + Character.MIN_LOW_SURROGATE); 43002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return String.valueOf(buffer); 43012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 43022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 43032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* (non-Javadoc) 43042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see java.util.Iterator#remove() 43052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 4306f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 43072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void remove() { 43082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new UnsupportedOperationException(); 4309f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert } 43102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 43112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 43122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 43132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see #containsAll(android.icu.text.UnicodeSet) 43142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 43152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public <T extends CharSequence> boolean containsAll(Iterable<T> collection) { 43162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (T o : collection) { 43172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!contains(o)) { 43182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 43192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 43202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 43212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 43222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 43232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 43242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 43252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see #containsNone(android.icu.text.UnicodeSet) 43262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 43272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public <T extends CharSequence> boolean containsNone(Iterable<T> collection) { 43282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (T o : collection) { 43292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (contains(o)) { 43302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 43312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 43322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 43332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 43342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 43352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 43362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 43372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see #containsAll(android.icu.text.UnicodeSet) 43382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 43392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final <T extends CharSequence> boolean containsSome(Iterable<T> collection) { 43402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return !containsNone(collection); 43412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 43422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 43432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 43442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see #addAll(android.icu.text.UnicodeSet) 43452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 43462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller @SuppressWarnings("unchecked") // See ticket #11395, this is safe. 43472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public <T extends CharSequence> UnicodeSet addAll(T... collection) { 43482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller checkFrozen(); 43492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (T str : collection) { 43502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller add(str); 43512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 43522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 43532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 43542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 43552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 43562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 43572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see #removeAll(android.icu.text.UnicodeSet) 43582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 43592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public <T extends CharSequence> UnicodeSet removeAll(Iterable<T> collection) { 43602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller checkFrozen(); 43612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (T o : collection) { 43622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller remove(o); 43632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 43642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 43652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 43662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 43672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 43682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see #retainAll(android.icu.text.UnicodeSet) 43692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 43702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public <T extends CharSequence> UnicodeSet retainAll(Iterable<T> collection) { 43712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller checkFrozen(); 43722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // TODO optimize 43732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeSet toRetain = new UnicodeSet(); 43742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller toRetain.addAll(collection); 43752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller retainAll(toRetain); 43762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 43772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 43782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 43792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 43802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Comparison style enums used by {@link UnicodeSet#compareTo(UnicodeSet, ComparisonStyle)}. 43812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 43822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public enum ComparisonStyle { 43832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 43842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 43852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller SHORTER_FIRST, 43862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 43872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 43882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller LEXICOGRAPHIC, 43892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 43902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 43912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller LONGER_FIRST 43922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 43932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 43942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 43952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Compares UnicodeSets, where shorter come first, and otherwise lexigraphically 43962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * (according to the comparison of the first characters that differ). 43972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see java.lang.Comparable#compareTo(java.lang.Object) 43982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 4399f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 44002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int compareTo(UnicodeSet o) { 44012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return compareTo(o, ComparisonStyle.SHORTER_FIRST); 44022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 44032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 44042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Compares UnicodeSets, in three different ways. 44052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see java.lang.Comparable#compareTo(java.lang.Object) 44062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 44072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int compareTo(UnicodeSet o, ComparisonStyle style) { 44082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (style != ComparisonStyle.LEXICOGRAPHIC) { 44092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int diff = size() - o.size(); 44102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (diff != 0) { 44112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (diff < 0) == (style == ComparisonStyle.SHORTER_FIRST) ? -1 : 1; 44122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 44132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 44142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int result; 44152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i = 0; ; ++i) { 44162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (0 != (result = list[i] - o.list[i])) { 44172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if either list ran out, compare to the last string 44182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (list[i] == HIGH) { 44192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (strings.isEmpty()) return 1; 44202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String item = strings.first(); 44212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return compare(item, o.list[i]); 44222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 44232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (o.list[i] == HIGH) { 44242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (o.strings.isEmpty()) return -1; 44252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String item = o.strings.first(); 4426f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert int compareResult = compare(item, list[i]); 4427f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert return compareResult > 0 ? -1 : compareResult < 0 ? 1 : 0; // Reverse the order. 44282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 44292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // otherwise return the result if even index, or the reversal if not 44302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (i & 1) == 0 ? result : -result; 44312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 44322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (list[i] == HIGH) { 44332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 44342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 44352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 44362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return compare(strings, o.strings); 44372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 44382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 44392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 44402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 44412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int compareTo(Iterable<String> other) { 44422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return compare(this, other); 44432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 44442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 44452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 44462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Utility to compare a string to a code point. 44472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Same results as turning the code point into a string (with the [ugly] new StringBuilder().appendCodePoint(codepoint).toString()) 4448f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert * and comparing, but much faster (no object creation). 44492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Actually, there is one difference; a null compares as less. 44502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Note that this (=String) order is UTF-16 order -- *not* code point order. 445139fda05a2af93ea1422c26c0e570d6d7b4a4f4eeJoachim Sauer * @hide unsupported on Android 44522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 44532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 44542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static int compare(CharSequence string, int codePoint) { 44552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return CharSequences.compare(string, codePoint); 44562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 44572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 44582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 44592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Utility to compare a string to a code point. 4460f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert * Same results as turning the code point into a string and comparing, but much faster (no object creation). 44612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Actually, there is one difference; a null compares as less. 44622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Note that this (=String) order is UTF-16 order -- *not* code point order. 446339fda05a2af93ea1422c26c0e570d6d7b4a4f4eeJoachim Sauer * @hide unsupported on Android 44642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 44652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static int compare(int codePoint, CharSequence string) { 44662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -CharSequences.compare(string, codePoint); 44672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 44682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 44692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 44702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 44712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Utility to compare two iterables. Warning: the ordering in iterables is important. For Collections that are ordered, 44722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * like Lists, that is expected. However, Sets in Java violate Leibniz's law when it comes to iteration. 44732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * That means that sets can't be compared directly with this method, unless they are TreeSets without 44742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * (or with the same) comparator. Unfortunately, it is impossible to reliably detect in Java whether subclass of 44752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Collection satisfies the right criteria, so it is left to the user to avoid those circumstances. 447639fda05a2af93ea1422c26c0e570d6d7b4a4f4eeJoachim Sauer * @hide unsupported on Android 44772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 44782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static <T extends Comparable<T>> int compare(Iterable<T> collection1, Iterable<T> collection2) { 44792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return compare(collection1.iterator(), collection2.iterator()); 44802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 44812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 44822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 44832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Utility to compare two iterators. Warning: the ordering in iterables is important. For Collections that are ordered, 44842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * like Lists, that is expected. However, Sets in Java violate Leibniz's law when it comes to iteration. 44852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * That means that sets can't be compared directly with this method, unless they are TreeSets without 44862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * (or with the same) comparator. Unfortunately, it is impossible to reliably detect in Java whether subclass of 44872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Collection satisfies the right criteria, so it is left to the user to avoid those circumstances. 44882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @deprecated This API is ICU internal only. 448993cf604e9dd0525f15bc0a7450b2a35f3884c298Neil Fuller * @hide original deprecated declaration 4490836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide draft / provisional / internal are hidden on Android 44912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 44922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller @Deprecated 44932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static <T extends Comparable<T>> int compare(Iterator<T> first, Iterator<T> other) { 44942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (true) { 44952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!first.hasNext()) { 44962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return other.hasNext() ? -1 : 0; 44972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (!other.hasNext()) { 44982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 1; 44992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 45002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller T item1 = first.next(); 45012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller T item2 = other.next(); 45022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int result = item1.compareTo(item2); 45032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (result != 0) { 45042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return result; 45052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 45062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 45072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 45082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 45092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 45102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 45112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Utility to compare two collections, optionally by size, and then lexicographically. 451239fda05a2af93ea1422c26c0e570d6d7b4a4f4eeJoachim Sauer * @hide unsupported on Android 45132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 45142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static <T extends Comparable<T>> int compare(Collection<T> collection1, Collection<T> collection2, ComparisonStyle style) { 45152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (style != ComparisonStyle.LEXICOGRAPHIC) { 45162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int diff = collection1.size() - collection2.size(); 45172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (diff != 0) { 45182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (diff < 0) == (style == ComparisonStyle.SHORTER_FIRST) ? -1 : 1; 45192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 45202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 45212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return compare(collection1, collection2); 45222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 45232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 45242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 45252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Utility for adding the contents of an iterable to a collection. 452639fda05a2af93ea1422c26c0e570d6d7b4a4f4eeJoachim Sauer * @hide unsupported on Android 45272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 45282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static <T, U extends Collection<T>> U addAllTo(Iterable<T> source, U target) { 45292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (T item : source) { 45302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller target.add(item); 45312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 45322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return target; 45332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 45342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 45352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 45362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Utility for adding the contents of an iterable to a collection. 453739fda05a2af93ea1422c26c0e570d6d7b4a4f4eeJoachim Sauer * @hide unsupported on Android 45382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 45392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static <T> T[] addAllTo(Iterable<T> source, T[] target) { 45402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i = 0; 45412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (T item : source) { 45422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller target[i++] = item; 45432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 45442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return target; 45452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 45462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 45472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 45482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * For iterating through the strings in the set. Example: 45492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <pre> 45502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * for (String key : myUnicodeSet.strings()) { 45512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * doSomethingWith(key); 45522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * } 45532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </pre> 45542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 45552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public Collection<String> strings() { 45562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return Collections.unmodifiableSortedSet(strings); 45572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 45582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 45592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 45602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Return the value of the first code point, if the string is exactly one code point. Otherwise return Integer.MAX_VALUE. 45612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @deprecated This API is ICU internal only. 456293cf604e9dd0525f15bc0a7450b2a35f3884c298Neil Fuller * @hide original deprecated declaration 4563836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide draft / provisional / internal are hidden on Android 45642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 45652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller @Deprecated 45662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static int getSingleCodePoint(CharSequence s) { 45672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return CharSequences.getSingleCodePoint(s); 45682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 45692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 45702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 4571f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert * Simplify the ranges in a Unicode set by merging any ranges that are only separated by characters in the dontCare set. 4572f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert * For example, the ranges: \\u2E80-\\u2E99\\u2E9B-\\u2EF3\\u2F00-\\u2FD5\\u2FF0-\\u2FFB\\u3000-\\u303E change to \\u2E80-\\u303E 45732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * if the dontCare set includes unassigned characters (for a particular version of Unicode). 45742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param dontCare Set with the don't-care characters for spanning 45752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return the input set, modified 45762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @deprecated This API is ICU internal only. 457793cf604e9dd0525f15bc0a7450b2a35f3884c298Neil Fuller * @hide original deprecated declaration 4578836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide draft / provisional / internal are hidden on Android 45792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 45802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller @Deprecated 45812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeSet addBridges(UnicodeSet dontCare) { 45822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeSet notInInput = new UnicodeSet(this).complement(); 45832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (UnicodeSetIterator it = new UnicodeSetIterator(notInInput); it.nextRange();) { 45842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (it.codepoint != 0 && it.codepoint != UnicodeSetIterator.IS_STRING && it.codepointEnd != 0x10FFFF && dontCare.contains(it.codepoint,it.codepointEnd)) { 45852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller add(it.codepoint,it.codepointEnd); 45862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 45872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 45882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 45892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 45902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 45912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 45922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Find the first index at or after fromIndex where the UnicodeSet matches at that index. 45932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If findNot is true, then reverse the sense of the match: find the first place where the UnicodeSet doesn't match. 45942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If there is no match, length is returned. 45952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @deprecated This API is ICU internal only. Use span instead. 459693cf604e9dd0525f15bc0a7450b2a35f3884c298Neil Fuller * @hide original deprecated declaration 4597836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide draft / provisional / internal are hidden on Android 45982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 45992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller @Deprecated 46002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int findIn(CharSequence value, int fromIndex, boolean findNot) { 46012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //TODO add strings, optimize, using ICU4C algorithms 46022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int cp; 46032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (; fromIndex < value.length(); fromIndex += UTF16.getCharCount(cp)) { 46042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cp = UTF16.charAt(value, fromIndex); 46052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (contains(cp) != findNot) { 46062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 46072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 46082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 46092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return fromIndex; 46102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 46112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 46122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 46132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Find the last index before fromIndex where the UnicodeSet matches at that index. 46142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If findNot is true, then reverse the sense of the match: find the last place where the UnicodeSet doesn't match. 46152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If there is no match, -1 is returned. 46162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * BEFORE index is not in the UnicodeSet. 46172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @deprecated This API is ICU internal only. Use spanBack instead. 461893cf604e9dd0525f15bc0a7450b2a35f3884c298Neil Fuller * @hide original deprecated declaration 4619836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide draft / provisional / internal are hidden on Android 46202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 46212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller @Deprecated 46222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int findLastIn(CharSequence value, int fromIndex, boolean findNot) { 46232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //TODO add strings, optimize, using ICU4C algorithms 46242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int cp; 46252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fromIndex -= 1; 46262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (; fromIndex >= 0; fromIndex -= UTF16.getCharCount(cp)) { 46272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cp = UTF16.charAt(value, fromIndex); 46282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (contains(cp) != findNot) { 46292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 46302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 46312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 46322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return fromIndex < 0 ? -1 : fromIndex; 46332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 46342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 46352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 46362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Strips code points from source. If matches is true, script all that match <i>this</i>. If matches is false, then strip all that <i>don't</i> match. 46372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param source The source of the CharSequence to strip from. 46382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param matches A boolean to either strip all that matches or don't match with the current UnicodeSet object. 46392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return The string after it has been stripped. 46402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @deprecated This API is ICU internal only. Use replaceFrom. 464193cf604e9dd0525f15bc0a7450b2a35f3884c298Neil Fuller * @hide original deprecated declaration 4642836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide draft / provisional / internal are hidden on Android 46432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 46442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller @Deprecated 46452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public String stripFrom(CharSequence source, boolean matches) { 46462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder result = new StringBuilder(); 46472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int pos = 0; pos < source.length();) { 46482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int inside = findIn(source, pos, !matches); 46492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append(source.subSequence(pos, inside)); 46502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pos = findIn(source, inside, matches); // get next start 46512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 46522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return result.toString(); 46532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 46542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 46552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 46562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Argument values for whether span() and similar functions continue while the current character is contained vs. 46572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * not contained in the set. 46582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p> 46592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The functionality is straightforward for sets with only single code points, without strings (which is the common 46602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * case): 46612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <ul> 46622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <li>CONTAINED and SIMPLE work the same. 46632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <li>CONTAINED and SIMPLE are inverses of NOT_CONTAINED. 46642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <li>span() and spanBack() partition any string the 46652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * same way when alternating between span(NOT_CONTAINED) and span(either "contained" condition). 46662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <li>Using a 46672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * complemented (inverted) set and the opposite span conditions yields the same results. 46682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </ul> 46692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * When a set contains multi-code point strings, then these statements may not be true, depending on the strings in 46702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the set (for example, whether they overlap with each other) and the string that is processed. For a set with 46712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * strings: 46722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <ul> 46732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <li>The complement of the set contains the opposite set of code points, but the same set of strings. 46742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Therefore, complementing both the set and the span conditions may yield different results. 46752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <li>When starting spans 46762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * at different positions in a string (span(s, ...) vs. span(s+1, ...)) the ends of the spans may be different 46772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * because a set string may start before the later position. 46782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <li>span(SIMPLE) may be shorter than 46792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * span(CONTAINED) because it will not recursively try all possible paths. For example, with a set which 46802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * contains the three strings "xy", "xya" and "ax", span("xyax", CONTAINED) will return 4 but span("xyax", 46812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * SIMPLE) will return 3. span(SIMPLE) will never be longer than span(CONTAINED). 46822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <li>With either "contained" condition, span() and spanBack() may partition a string in different ways. For example, 46832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * with a set which contains the two strings "ab" and "ba", and when processing the string "aba", span() will yield 46842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * contained/not-contained boundaries of { 0, 2, 3 } while spanBack() will yield boundaries of { 0, 1, 3 }. 46852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </ul> 46862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Note: If it is important to get the same boundaries whether iterating forward or backward through a string, then 46872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * either only span() should be used and the boundaries cached for backward operation, or an ICU BreakIterator could 46882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * be used. 46892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p> 46902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Note: Unpaired surrogates are treated like surrogate code points. Similarly, set strings match only on code point 46912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * boundaries, never in the middle of a surrogate pair. 46922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 46932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public enum SpanCondition { 46942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 46952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Continues a span() while there is no set element at the current position. 46962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Increments by one code point at a time. 46972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Stops before the first set element (character or string). 46982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * (For code points only, this is like while contains(current)==false). 46992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p> 47002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * When span() returns, the substring between where it started and the position it returned consists only of 47012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * characters that are not in the set, and none of its strings overlap with the span. 47022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 47032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller NOT_CONTAINED, 47042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 47052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 47062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Spans the longest substring that is a concatenation of set elements (characters or strings). 47072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * (For characters only, this is like while contains(current)==true). 47082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p> 47092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * When span() returns, the substring between where it started and the position it returned consists only of set 47102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * elements (characters or strings) that are in the set. 47112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p> 47122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If a set contains strings, then the span will be the longest substring for which there 47132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * exists at least one non-overlapping concatenation of set elements (characters or strings). 47142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * This is equivalent to a POSIX regular expression for <code>(OR of each set element)*</code>. 47152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * (Java/ICU/Perl regex stops at the first match of an OR.) 47162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 47172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CONTAINED, 47182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 47192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 47202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Continues a span() while there is a set element at the current position. 47212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Increments by the longest matching element at each position. 47222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * (For characters only, this is like while contains(current)==true). 47232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p> 47242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * When span() returns, the substring between where it started and the position it returned consists only of set 47252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * elements (characters or strings) that are in the set. 47262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p> 47272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If a set only contains single characters, then this is the same as CONTAINED. 47282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p> 47292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If a set contains strings, then the span will be the longest substring with a match at each position with the 47302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * longest single set element (character or string). 47312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p> 47322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Use this span condition together with other longest-match algorithms, such as ICU converters 47332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * (ucnv_getUnicodeSet()). 47342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 47352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller SIMPLE, 47362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 47372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 47382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * One more than the last span condition. 47392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 47402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CONDITION_COUNT 47412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 47422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 47432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 47442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Get the default symbol table. Null means ordinary processing. For internal use only. 47452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return the symbol table 47462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @deprecated This API is ICU internal only. 474793cf604e9dd0525f15bc0a7450b2a35f3884c298Neil Fuller * @hide original deprecated declaration 4748836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide draft / provisional / internal are hidden on Android 47492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 47502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller @Deprecated 47512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static XSymbolTable getDefaultXSymbolTable() { 47522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return XSYMBOL_TABLE; 47532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 47542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 47552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 47562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Set the default symbol table. Null means ordinary processing. For internal use only. Will affect all subsequent parsing 47572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * of UnicodeSets. 47582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p> 47592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * WARNING: If this function is used with a UnicodeProperty, and the 47602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Unassigned characters (gc=Cn) are different than in ICU other than in ICU, you MUST call 47612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * {@code UnicodeProperty.ResetCacheProperties} afterwards. If you then call {@code UnicodeSet.setDefaultXSymbolTable} 47622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * with null to clear the value, you MUST also call {@code UnicodeProperty.ResetCacheProperties}. 4763f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert * 47642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param xSymbolTable the new default symbol table. 47652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @deprecated This API is ICU internal only. 476693cf604e9dd0525f15bc0a7450b2a35f3884c298Neil Fuller * @hide original deprecated declaration 4767836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide draft / provisional / internal are hidden on Android 47682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 47692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller @Deprecated 47702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static void setDefaultXSymbolTable(XSymbolTable xSymbolTable) { 4771f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert INCLUSIONS = null; // If the properties override inclusions, these have to be regenerated. 47722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller XSYMBOL_TABLE = xSymbolTable; 47732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 47742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller} 47752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller//eof 4776