12ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* GENERATED SOURCE. DO NOT MODIFY. */
2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others.
3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
42ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/*
52ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *******************************************************************************
6bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * Copyright (C) 1996-2016, International Business Machines Corporation and
72ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * others. All Rights Reserved.
82ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *******************************************************************************
92ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */
102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpackage android.icu.text;
112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.io.IOException;
132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.text.ParsePosition;
142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.ArrayList;
152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.Collection;
162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.Collections;
172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.Iterator;
182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.NoSuchElementException;
192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.TreeSet;
202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.BMPSet;
222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.Norm2AllModes;
232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.PatternProps;
242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.RuleCharacterIterator;
252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.SortedSetRelation;
262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.StringRange;
272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.UBiDiProps;
282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.UCaseProps;
292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.UCharacterProperty;
302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.UPropertyAliases;
312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.UnicodeSetStringSpan;
322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.Utility;
332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.CharSequences;
342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UCharacter;
352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UProperty;
362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UScript;
372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.util.Freezable;
382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.util.ICUUncheckedIOException;
392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.util.OutputInt;
402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.util.ULocale;
412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.util.VersionInfo;
422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/**
442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * A mutable set of Unicode characters and multicharacter strings.
452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Objects of this class represent <em>character classes</em> used
462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * in regular expressions. A character specifies a subset of Unicode
472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * code points.  Legal code points are U+0000 to U+10FFFF, inclusive.
482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *
492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Note: method freeze() will not only make the set immutable, but
502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * also makes important methods much higher performance:
512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * contains(c), containsNone(...), span(...), spanBack(...) etc.
522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * After the object is frozen, any subsequent call that wants to change
532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the object will throw UnsupportedOperationException.
542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *
552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p>The UnicodeSet class is not designed to be subclassed.
562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *
572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p><code>UnicodeSet</code> supports two APIs. The first is the
582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <em>operand</em> API that allows the caller to modify the value of
592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * a <code>UnicodeSet</code> object. It conforms to Java 2's
602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <code>java.util.Set</code> interface, although
612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <code>UnicodeSet</code> does not actually implement that
622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * interface. All methods of <code>Set</code> are supported, with the
632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * modification that they take a character range or single character
642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * instead of an <code>Object</code>, and they take a
652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <code>UnicodeSet</code> instead of a <code>Collection</code>.  The
662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * operand API may be thought of in terms of boolean logic: a boolean
672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * OR is implemented by <code>add</code>, a boolean AND is implemented
682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * by <code>retain</code>, a boolean XOR is implemented by
692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <code>complement</code> taking an argument, and a boolean NOT is
702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * implemented by <code>complement</code> with no argument.  In terms
712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * of traditional set theory function names, <code>add</code> is a
722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * union, <code>retain</code> is an intersection, <code>remove</code>
732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * is an asymmetric difference, and <code>complement</code> with no
742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * argument is a set complement with respect to the superset range
752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <code>MIN_VALUE-MAX_VALUE</code>
762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *
772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p>The second API is the
782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <code>applyPattern()</code>/<code>toPattern()</code> API from the
792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <code>java.text.Format</code>-derived classes.  Unlike the
802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * methods that add characters, add categories, and control the logic
812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * of the set, the method <code>applyPattern()</code> sets all
822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * attributes of a <code>UnicodeSet</code> at once, based on a
832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * string pattern.
842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *
852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p><b>Pattern syntax</b></p>
862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *
872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Patterns are accepted by the constructors and the
882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <code>applyPattern()</code> methods and returned by the
892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <code>toPattern()</code> method.  These patterns follow a syntax
902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * similar to that employed by version 8 regular expression character
912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * classes.  Here are some simple examples:
922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *
932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <blockquote>
942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *   <table>
95bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *     <tr style="vertical-align: top">
96bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *       <td style="white-space: nowrap; vertical-align: top; horizontal-align: left;"><code>[]</code></td>
97bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *       <td style="vertical-align: top;">No characters</td>
98bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *     </tr><tr style="vertical-align: top">
99bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *       <td style="white-space: nowrap; vertical-align: top; horizontal-align: left;"><code>[a]</code></td>
100bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *       <td style="vertical-align: top;">The character 'a'</td>
101bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *     </tr><tr style="vertical-align: top">
102bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *       <td style="white-space: nowrap; vertical-align: top; horizontal-align: left;"><code>[ae]</code></td>
103bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *       <td style="vertical-align: top;">The characters 'a' and 'e'</td>
1042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *     </tr>
1052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *     <tr>
106bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *       <td style="white-space: nowrap; vertical-align: top; horizontal-align: left;"><code>[a-e]</code></td>
107bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *       <td style="vertical-align: top;">The characters 'a' through 'e' inclusive, in Unicode code
1082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *       point order</td>
1092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *     </tr>
1102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *     <tr>
111bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *       <td style="white-space: nowrap; vertical-align: top; horizontal-align: left;"><code>[\\u4E01]</code></td>
112bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *       <td style="vertical-align: top;">The character U+4E01</td>
1132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *     </tr>
1142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *     <tr>
115bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *       <td style="white-space: nowrap; vertical-align: top; horizontal-align: left;"><code>[a{ab}{ac}]</code></td>
116bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *       <td style="vertical-align: top;">The character 'a' and the multicharacter strings &quot;ab&quot; and
1172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *       &quot;ac&quot;</td>
1182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *     </tr>
1192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *     <tr>
120bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *       <td style="white-space: nowrap; vertical-align: top; horizontal-align: left;"><code>[\p{Lu}]</code></td>
121bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *       <td style="vertical-align: top;">All characters in the general category Uppercase Letter</td>
1222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *     </tr>
1232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *   </table>
1242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </blockquote>
1252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *
1262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Any character may be preceded by a backslash in order to remove any special
1272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * meaning.  White space characters, as defined by the Unicode Pattern_White_Space property, are
1282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * ignored, unless they are escaped.
1292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *
1302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p>Property patterns specify a set of characters having a certain
1312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * property as defined by the Unicode standard.  Both the POSIX-like
1322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * "[:Lu:]" and the Perl-like syntax "\p{Lu}" are recognized.  For a
1332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * complete list of supported property patterns, see the User's Guide
1342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * for UnicodeSet at
1352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <a href="http://www.icu-project.org/userguide/unicodeSet.html">
1362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * http://www.icu-project.org/userguide/unicodeSet.html</a>.
1372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Actual determination of property data is defined by the underlying
1382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Unicode database as implemented by UCharacter.
1392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *
1402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p>Patterns specify individual characters, ranges of characters, and
1412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Unicode property sets.  When elements are concatenated, they
1422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * specify their union.  To complement a set, place a '^' immediately
1432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * after the opening '['.  Property patterns are inverted by modifying
1442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * their delimiters; "[:^foo]" and "\P{foo}".  In any other location,
1452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * '^' has no special meaning.
1462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *
1472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p>Ranges are indicated by placing two a '-' between two
1482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * characters, as in "a-z".  This specifies the range of all
1492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * characters from the left to the right, in Unicode order.  If the
1502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * left character is greater than or equal to the
1512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * right character it is a syntax error.  If a '-' occurs as the first
1522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * character after the opening '[' or '[^', or if it occurs as the
1532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * last character before the closing ']', then it is taken as a
1542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * literal.  Thus "[a\\-b]", "[-ab]", and "[ab-]" all indicate the same
1552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * set of three characters, 'a', 'b', and '-'.
1562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *
157bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <p>Sets may be intersected using the '&amp;' operator or the asymmetric
1582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * set difference may be taken using the '-' operator, for example,
159bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * "[[:L:]&amp;[\\u0000-\\u0FFF]]" indicates the set of all Unicode letters
160bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * with values less than 4096.  Operators ('&amp;' and '|') have equal
1612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * precedence and bind left-to-right.  Thus
1622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * "[[:L:]-[a-z]-[\\u0100-\\u01FF]]" is equivalent to
1632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * "[[[:L:]-[a-z]]-[\\u0100-\\u01FF]]".  This only really matters for
1642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * difference; intersection is commutative.
1652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *
1662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <table>
167bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <tr style="vertical-align: top;"><td style="white-space: nowrap;"><code>[a]</code><td>The set containing 'a'
168bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <tr style="vertical-align: top;"><td style="white-space: nowrap;"><code>[a-z]</code><td>The set containing 'a'
1692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * through 'z' and all letters in between, in Unicode order
170bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <tr style="vertical-align: top;"><td style="white-space: nowrap;"><code>[^a-z]</code><td>The set containing
1712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * all characters but 'a' through 'z',
1722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * that is, U+0000 through 'a'-1 and 'z'+1 through U+10FFFF
173bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <tr style="vertical-align: top;"><td style="white-space: nowrap;"><code>[[<em>pat1</em>][<em>pat2</em>]]</code>
1742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <td>The union of sets specified by <em>pat1</em> and <em>pat2</em>
175bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <tr style="vertical-align: top;"><td style="white-space: nowrap;"><code>[[<em>pat1</em>]&amp;[<em>pat2</em>]]</code>
1762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <td>The intersection of sets specified by <em>pat1</em> and <em>pat2</em>
177bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <tr style="vertical-align: top;"><td style="white-space: nowrap;"><code>[[<em>pat1</em>]-[<em>pat2</em>]]</code>
1782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <td>The asymmetric difference of sets specified by <em>pat1</em> and
1792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <em>pat2</em>
180bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <tr style="vertical-align: top;"><td style="white-space: nowrap;"><code>[:Lu:] or \p{Lu}</code>
1812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <td>The set of characters having the specified
1822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Unicode property; in
1832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * this case, Unicode uppercase letters
184bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * <tr style="vertical-align: top;"><td style="white-space: nowrap;"><code>[:^Lu:] or \P{Lu}</code>
1852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <td>The set of characters <em>not</em> having the given
1862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Unicode property
1872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </table>
1882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *
1892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p><b>Warning</b>: you cannot add an empty string ("") to a UnicodeSet.</p>
1902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *
1912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p><b>Formal syntax</b></p>
1922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *
1932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <blockquote>
1942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *   <table>
195bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *     <tr style="vertical-align: top">
196bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *       <td style="white-space: nowrap; vertical-align: top;" align="right"><code>pattern :=&nbsp; </code></td>
197bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *       <td style="vertical-align: top;"><code>('[' '^'? item* ']') |
1982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *       property</code></td>
1992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *     </tr>
200bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *     <tr style="vertical-align: top">
201bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *       <td style="white-space: nowrap; vertical-align: top;" align="right"><code>item :=&nbsp; </code></td>
202bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *       <td style="vertical-align: top;"><code>char | (char '-' char) | pattern-expr<br>
2032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *       </code></td>
2042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *     </tr>
205bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *     <tr style="vertical-align: top">
206bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *       <td style="white-space: nowrap; vertical-align: top;" align="right"><code>pattern-expr :=&nbsp; </code></td>
207bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *       <td style="vertical-align: top;"><code>pattern | pattern-expr pattern |
2082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *       pattern-expr op pattern<br>
2092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *       </code></td>
2102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *     </tr>
211bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *     <tr style="vertical-align: top">
212bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *       <td style="white-space: nowrap; vertical-align: top;" align="right"><code>op :=&nbsp; </code></td>
213bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *       <td style="vertical-align: top;"><code>'&amp;' | '-'<br>
2142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *       </code></td>
2152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *     </tr>
216bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *     <tr style="vertical-align: top">
217bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *       <td style="white-space: nowrap; vertical-align: top;" align="right"><code>special :=&nbsp; </code></td>
218bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *       <td style="vertical-align: top;"><code>'[' | ']' | '-'<br>
2192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *       </code></td>
2202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *     </tr>
221bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *     <tr style="vertical-align: top">
222bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *       <td style="white-space: nowrap; vertical-align: top;" align="right"><code>char :=&nbsp; </code></td>
223bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *       <td style="vertical-align: top;"><em>any character that is not</em><code> special<br>
2242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *       | ('\\' </code><em>any character</em><code>)<br>
2252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *       | ('&#92;u' hex hex hex hex)<br>
2262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *       </code></td>
2272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *     </tr>
228bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *     <tr style="vertical-align: top">
229bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *       <td style="white-space: nowrap; vertical-align: top;" align="right"><code>hex :=&nbsp; </code></td>
230bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *       <td style="vertical-align: top;"><em>any character for which
2312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *       </em><code>Character.digit(c, 16)</code><em>
2322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *       returns a non-negative result</em></td>
2332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *     </tr>
2342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *     <tr>
235bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *       <td style="white-space: nowrap; vertical-align: top;" align="right"><code>property :=&nbsp; </code></td>
236bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *       <td style="vertical-align: top;"><em>a Unicode property set pattern</em></td>
2372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *     </tr>
2382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *   </table>
2392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *   <br>
2402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *   <table border="1">
2412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *     <tr>
2422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *       <td>Legend: <table>
2432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *         <tr>
244bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *           <td style="white-space: nowrap; vertical-align: top;"><code>a := b</code></td>
245bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *           <td style="width: 20; vertical-align: top;">&nbsp; </td>
246bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *           <td style="vertical-align: top;"><code>a</code> may be replaced by <code>b</code> </td>
2472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *         </tr>
2482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *         <tr>
249bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *           <td style="white-space: nowrap; vertical-align: top;"><code>a?</code></td>
250bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *           <td style="vertical-align: top;"></td>
251bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *           <td style="vertical-align: top;">zero or one instance of <code>a</code><br>
2522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *           </td>
2532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *         </tr>
2542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *         <tr>
255bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *           <td style="white-space: nowrap; vertical-align: top;"><code>a*</code></td>
256bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *           <td style="vertical-align: top;"></td>
257bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *           <td style="vertical-align: top;">one or more instances of <code>a</code><br>
2582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *           </td>
2592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *         </tr>
2602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *         <tr>
261bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *           <td style="white-space: nowrap; vertical-align: top;"><code>a | b</code></td>
262bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *           <td style="vertical-align: top;"></td>
263bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *           <td style="vertical-align: top;">either <code>a</code> or <code>b</code><br>
2642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *           </td>
2652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *         </tr>
2662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *         <tr>
267bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *           <td style="white-space: nowrap; vertical-align: top;"><code>'a'</code></td>
268bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *           <td style="vertical-align: top;"></td>
269bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin *           <td style="vertical-align: top;">the literal string between the quotes </td>
2702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *         </tr>
2712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *       </table>
2722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *       </td>
2732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *     </tr>
2742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *   </table>
2752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </blockquote>
2762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p>To iterate over contents of UnicodeSet, the following are available:
2772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <ul><li>{@link #ranges()} to iterate through the ranges</li>
2782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <li>{@link #strings()} to iterate through the strings</li>
2792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <li>{@link #iterator()} to iterate through the entire contents in a single loop.
2802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * That method is, however, not particularly efficient, since it "boxes" each code point into a String.
2812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </ul>
2822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * All of the above can be used in <b>for</b> loops.
2832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The {@link android.icu.text.UnicodeSetIterator UnicodeSetIterator} can also be used, but not in <b>for</b> loops.
2842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p>To replace, count elements, or delete spans, see {@link android.icu.text.UnicodeSetSpanner UnicodeSetSpanner}.
2852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *
2862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @author Alan Liu
2872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see UnicodeSetIterator
2882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see UnicodeSetSpanner
2892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */
2902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpublic class UnicodeSet extends UnicodeFilter implements Iterable<String>, Comparable<UnicodeSet>, Freezable<UnicodeSet> {
2912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
2932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Constant for the empty set.
2942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
2952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final UnicodeSet EMPTY = new UnicodeSet().freeze();
2962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
2972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Constant for the set of all code points. (Since UnicodeSets can include strings, does not include everything that a UnicodeSet can.)
2982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
2992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final UnicodeSet ALL_CODE_POINTS = new UnicodeSet(0, 0x10FFFF).freeze();
3002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static XSymbolTable XSYMBOL_TABLE = null; // for overriding the the function processing
3022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int LOW = 0x000000; // LOW <= all valid values. ZERO for codepoints
3042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int HIGH = 0x110000; // HIGH > all valid values. 10000 for code units.
3052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // 110000 for codepoints
3062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
3082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Minimum value that can be stored in a UnicodeSet.
3092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
3102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int MIN_VALUE = LOW;
3112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
3132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Maximum value that can be stored in a UnicodeSet.
3142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
3152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int MAX_VALUE = HIGH - 1;
3162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int len;      // length used; list may be longer to minimize reallocs
3182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int[] list;   // MUST be terminated with HIGH
3192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int[] rangeList; // internal buffer
3202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int[] buffer; // internal buffer
3212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // NOTE: normally the field should be of type SortedSet; but that is missing a public clone!!
3232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // is not private so that UnicodeSetIterator can get access
3242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    TreeSet<String> strings = new TreeSet<String>();
3252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
3272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The pattern representation of this set.  This may not be the
3282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * most economical pattern.  It is the pattern supplied to
3292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * applyPattern(), with variables substituted and whitespace
3302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * removed.  For sets constructed without applyPattern(), or
3312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * modified using the non-pattern API, this string will be null,
3322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * indicating that toPattern() must generate a pattern
3332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * representation from the inversion list.
3342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
3352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private String pat = null;
3362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int START_EXTRA = 16;         // initial storage. Must be >= 0
3382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int GROW_EXTRA = START_EXTRA; // extra amount for growth. Must be >= 0
3392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Special property set IDs
3412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final String ANY_ID   = "ANY";   // [\u0000-\U0010FFFF]
3422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final String ASCII_ID = "ASCII"; // [\u0000-\u007F]
3432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final String ASSIGNED = "Assigned"; // [:^Cn:]
3442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
3462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * A set of all characters _except_ the second through last characters of
3472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * certain ranges.  These ranges are ranges of characters whose
3482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * properties are all exactly alike, e.g. CJK Ideographs from
3492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * U+4E00 to U+9FA5.
3502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
3512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static UnicodeSet INCLUSIONS[] = null;
3522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private volatile BMPSet bmpSet; // The set is frozen if bmpSet or stringSpan is not null.
3542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private volatile UnicodeSetStringSpan stringSpan;
3552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //----------------------------------------------------------------
3562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Public API
3572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //----------------------------------------------------------------
3582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
3602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Constructs an empty set.
3612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
3622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet() {
3632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        list = new int[1 + START_EXTRA];
3642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        list[len++] = HIGH;
3652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
3662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
3682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Constructs a copy of an existing set.
3692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
3702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet(UnicodeSet other) {
3712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        set(other);
3722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
3732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
375bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin     * Constructs a set containing the given range. If <code>end &gt;
3762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * start</code> then an empty set is created.
3772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
3782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param start first character, inclusive, of range
3792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param end last character, inclusive, of range
3802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
3812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet(int start, int end) {
3822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        this();
3832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        complement(start, end);
3842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
3852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
387bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin     * Quickly constructs a set from a set of ranges &lt;s0, e0, s1, e1, s2, e2, ..., sn, en&gt;.
3882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * There must be an even number of integers, and they must be all greater than zero,
3892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * all less than or equal to Character.MAX_CODE_POINT.
390bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin     * In each pair (..., si, ei, ...) it must be true that si &lt;= ei
391bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin     * Between adjacent pairs (...ei, sj...), it must be true that ei+1 &lt; sj
3922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param pairs pairs of character representing ranges
3932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
3942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet(int... pairs) {
3952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if ((pairs.length & 1) != 0) {
3962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalArgumentException("Must have even number of integers");
3972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        list = new int[pairs.length + 1]; // don't allocate extra space, because it is likely that this is a fixed set.
3992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        len = list.length;
4002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int last = -1; // used to ensure that the results are monotonically increasing.
4012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int i = 0;
4022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while (i < pairs.length) {
4032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // start of pair
4042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int start = pairs[i];
4052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (last >= start) {
4062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                throw new IllegalArgumentException("Must be monotonically increasing.");
4072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            list[i++] = last = start;
4092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // end of pair
4102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int end = pairs[i] + 1;
4112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (last >= end) {
4122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                throw new IllegalArgumentException("Must be monotonically increasing.");
4132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            list[i++] = last = end;
4152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        list[i] = HIGH; // terminate
4172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
4202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Constructs a set from the given pattern.  See the class description
4212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * for the syntax of the pattern language.  Whitespace is ignored.
4222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param pattern a string specifying what characters are in the set
4232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @exception java.lang.IllegalArgumentException if the pattern contains
4242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * a syntax error.
4252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
4262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet(String pattern) {
4272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        this();
4282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        applyPattern(pattern, null, null, IGNORE_SPACE);
4292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
4322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Constructs a set from the given pattern.  See the class description
4332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * for the syntax of the pattern language.
4342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param pattern a string specifying what characters are in the set
4352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param ignoreWhitespace if true, ignore Unicode Pattern_White_Space characters
4362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @exception java.lang.IllegalArgumentException if the pattern contains
4372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * a syntax error.
4382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
4392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet(String pattern, boolean ignoreWhitespace) {
4402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        this();
4412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        applyPattern(pattern, null, null, ignoreWhitespace ? IGNORE_SPACE : 0);
4422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
4452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Constructs a set from the given pattern.  See the class description
4462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * for the syntax of the pattern language.
4472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param pattern a string specifying what characters are in the set
4482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param options a bitmask indicating which options to apply.
4492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Valid options are IGNORE_SPACE and CASE.
4502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @exception java.lang.IllegalArgumentException if the pattern contains
4512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * a syntax error.
4522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
4532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet(String pattern, int options) {
4542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        this();
4552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        applyPattern(pattern, null, null, options);
4562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
4592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Constructs a set from the given pattern.  See the class description
4602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * for the syntax of the pattern language.
4612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param pattern a string specifying what characters are in the set
4622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param pos on input, the position in pattern at which to start parsing.
4632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * On output, the position after the last character parsed.
4642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param symbols a symbol table mapping variables to char[] arrays
4652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * and chars to UnicodeSets
4662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @exception java.lang.IllegalArgumentException if the pattern
4672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * contains a syntax error.
4682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
4692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet(String pattern, ParsePosition pos, SymbolTable symbols) {
4702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        this();
4712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        applyPattern(pattern, pos, symbols, IGNORE_SPACE);
4722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
4752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Constructs a set from the given pattern.  See the class description
4762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * for the syntax of the pattern language.
4772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param pattern a string specifying what characters are in the set
4782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param pos on input, the position in pattern at which to start parsing.
4792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * On output, the position after the last character parsed.
4802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param symbols a symbol table mapping variables to char[] arrays
4812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * and chars to UnicodeSets
4822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param options a bitmask indicating which options to apply.
4832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Valid options are IGNORE_SPACE and CASE.
4842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @exception java.lang.IllegalArgumentException if the pattern
4852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * contains a syntax error.
4862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
4872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet(String pattern, ParsePosition pos, SymbolTable symbols, int options) {
4882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        this();
4892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        applyPattern(pattern, pos, symbols, options);
4902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
4942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Return a new set that is equivalent to this one.
4952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
496f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
4972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public Object clone() {
4982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (isFrozen()) {
4992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return this;
5002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        UnicodeSet result = new UnicodeSet(this);
5022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        result.bmpSet = this.bmpSet;
5032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        result.stringSpan = this.stringSpan;
5042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return result;
5052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
5082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Make this object represent the range <code>start - end</code>.
509bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin     * If <code>end &gt; start</code> then this object is set to an
5102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * an empty range.
5112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
5122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param start first character in the set, inclusive
5132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param end last character in the set, inclusive
5142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
5152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet set(int start, int end) {
5162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        checkFrozen();
5172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        clear();
5182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        complement(start, end);
5192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
5202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
5232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Make this object represent the same set as <code>other</code>.
5242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param other a <code>UnicodeSet</code> whose value will be
5252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * copied to this object
5262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
5272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet set(UnicodeSet other) {
5282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        checkFrozen();
5292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        list = other.list.clone();
5302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        len = other.len;
5312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        pat = other.pat;
5322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        strings = new TreeSet<String>(other.strings);
5332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
5342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
5372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Modifies this set to represent the set specified by the given pattern.
5382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * See the class description for the syntax of the pattern language.
5392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Whitespace is ignored.
5402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param pattern a string specifying what characters are in the set
5412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @exception java.lang.IllegalArgumentException if the pattern
5422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * contains a syntax error.
5432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
5442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final UnicodeSet applyPattern(String pattern) {
5452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        checkFrozen();
5462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return applyPattern(pattern, null, null, IGNORE_SPACE);
5472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
5502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Modifies this set to represent the set specified by the given pattern,
5512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * optionally ignoring whitespace.
5522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * See the class description for the syntax of the pattern language.
5532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param pattern a string specifying what characters are in the set
5542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param ignoreWhitespace if true then Unicode Pattern_White_Space characters are ignored
5552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @exception java.lang.IllegalArgumentException if the pattern
5562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * contains a syntax error.
5572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
5582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet applyPattern(String pattern, boolean ignoreWhitespace) {
5592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        checkFrozen();
5602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return applyPattern(pattern, null, null, ignoreWhitespace ? IGNORE_SPACE : 0);
5612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
5642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Modifies this set to represent the set specified by the given pattern,
5652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * optionally ignoring whitespace.
5662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * See the class description for the syntax of the pattern language.
5672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param pattern a string specifying what characters are in the set
5682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param options a bitmask indicating which options to apply.
5692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Valid options are IGNORE_SPACE and CASE.
5702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @exception java.lang.IllegalArgumentException if the pattern
5712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * contains a syntax error.
5722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
5732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet applyPattern(String pattern, int options) {
5742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        checkFrozen();
5752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return applyPattern(pattern, null, null, options);
5762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
5792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Return true if the given position, in the given pattern, appears
5802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * to be the start of a UnicodeSet pattern.
58139fda05a2af93ea1422c26c0e570d6d7b4a4f4eeJoachim Sauer     * @hide unsupported on Android
5822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
5832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static boolean resemblesPattern(String pattern, int pos) {
5842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return ((pos+1) < pattern.length() &&
5852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                pattern.charAt(pos) == '[') ||
5862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                resemblesPropertyPattern(pattern, pos);
5872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
5902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * TODO: create Appendable version of UTF16.append(buf, c),
5912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * maybe in new class Appendables?
5922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @throws IOException
5932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
5942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static void appendCodePoint(Appendable app, int c) {
5952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        assert 0 <= c && c <= 0x10ffff;
5962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        try {
5972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (c <= 0xffff) {
5982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                app.append((char) c);
5992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
6002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                app.append(UTF16.getLeadSurrogate(c)).append(UTF16.getTrailSurrogate(c));
6012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
6022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } catch (IOException e) {
6032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new ICUUncheckedIOException(e);
6042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
6062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
6082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * TODO: create class Appendables?
6092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @throws IOException
6102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
6112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static void append(Appendable app, CharSequence s) {
6122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        try {
6132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            app.append(s);
6142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } catch (IOException e) {
6152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new ICUUncheckedIOException(e);
6162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
6182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
6202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Append the <code>toPattern()</code> representation of a
6212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * string to the given <code>Appendable</code>.
6222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
6232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static <T extends Appendable> T _appendToPat(T buf, String s, boolean escapeUnprintable) {
6242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int cp;
6252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = 0; i < s.length(); i += Character.charCount(cp)) {
6262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            cp = s.codePointAt(i);
6272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            _appendToPat(buf, cp, escapeUnprintable);
6282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return buf;
6302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
6312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
6332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Append the <code>toPattern()</code> representation of a
6342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * character to the given <code>Appendable</code>.
6352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
6362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static <T extends Appendable> T _appendToPat(T buf, int c, boolean escapeUnprintable) {
6372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        try {
6382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (escapeUnprintable && Utility.isUnprintable(c)) {
6392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Use hex escape notation (<backslash>uxxxx or <backslash>Uxxxxxxxx) for anything
6402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // unprintable
6412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (Utility.escapeUnprintable(buf, c)) {
6422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return buf;
6432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
6452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Okay to let ':' pass through
6462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            switch (c) {
6472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case '[': // SET_OPEN:
6482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case ']': // SET_CLOSE:
6492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case '-': // HYPHEN:
6502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case '^': // COMPLEMENT:
6512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case '&': // INTERSECTION:
6522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case '\\': //BACKSLASH:
6532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case '{':
6542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case '}':
6552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case '$':
6562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case ':':
6572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buf.append('\\');
6582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
6592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            default:
6602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Escape whitespace
6612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (PatternProps.isWhiteSpace(c)) {
6622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buf.append('\\');
6632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
6652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
6662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            appendCodePoint(buf, c);
6672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return buf;
6682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } catch (IOException e) {
6692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new ICUUncheckedIOException(e);
6702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
6722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
6742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Returns a string representation of this set.  If the result of
6752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * calling this function is passed to a UnicodeSet constructor, it
6762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * will produce another set that is equal to this one.
6772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
678f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
6792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public String toPattern(boolean escapeUnprintable) {
6802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (pat != null && !escapeUnprintable) {
6812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return pat;
6822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder result = new StringBuilder();
6842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return _toPattern(result, escapeUnprintable).toString();
6852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
6862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
6882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Append a string representation of this set to result.  This will be
6892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * a cleaned version of the string passed to applyPattern(), if there
6902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * is one.  Otherwise it will be generated.
6912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
6922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private <T extends Appendable> T _toPattern(T result,
6932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            boolean escapeUnprintable) {
6942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (pat == null) {
6952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return appendNewPattern(result, escapeUnprintable, true);
6962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        try {
6982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (!escapeUnprintable) {
6992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                result.append(pat);
7002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return result;
7012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            boolean oddNumberOfBackslashes = false;
7032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for (int i=0; i<pat.length(); ) {
7042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int c = pat.codePointAt(i);
7052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                i += Character.charCount(c);
7062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (Utility.isUnprintable(c)) {
7072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // If the unprintable character is preceded by an odd
7082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // number of backslashes, then it has been escaped
7092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // and we omit the last backslash.
7102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    Utility.escapeUnprintable(result, c);
7112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    oddNumberOfBackslashes = false;
7122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if (!oddNumberOfBackslashes && c == '\\') {
7132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Temporarily withhold an odd-numbered backslash.
7142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    oddNumberOfBackslashes = true;
7152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
7162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (oddNumberOfBackslashes) {
7172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        result.append('\\');
7182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
7192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    appendCodePoint(result, c);
7202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    oddNumberOfBackslashes = false;
7212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
7222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (oddNumberOfBackslashes) {
7242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                result.append('\\');
7252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return result;
7272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } catch (IOException e) {
7282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new ICUUncheckedIOException(e);
7292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
7312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
7332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Generate and append a string representation of this set to result.
7342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * This does not use this.pat, the cleaned up copy of the string
7352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * passed to applyPattern().
7362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param result the buffer into which to generate the pattern
7372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param escapeUnprintable escape unprintable characters if true
7382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
7392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public StringBuffer _generatePattern(StringBuffer result, boolean escapeUnprintable) {
7402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return _generatePattern(result, escapeUnprintable, true);
7412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
7422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
7442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Generate and append a string representation of this set to result.
7452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * This does not use this.pat, the cleaned up copy of the string
7462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * passed to applyPattern().
7472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param includeStrings if false, doesn't include the strings.
7482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
7492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public StringBuffer _generatePattern(StringBuffer result,
7502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            boolean escapeUnprintable, boolean includeStrings) {
7512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return appendNewPattern(result, escapeUnprintable, includeStrings);
7522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
7532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private <T extends Appendable> T appendNewPattern(
7552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            T result, boolean escapeUnprintable, boolean includeStrings) {
7562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        try {
7572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            result.append('[');
7582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int count = getRangeCount();
7602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // If the set contains at least 2 intervals and includes both
7622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // MIN_VALUE and MAX_VALUE, then the inverse representation will
7632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // be more economical.
7642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (count > 1 &&
7652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    getRangeStart(0) == MIN_VALUE &&
7662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    getRangeEnd(count-1) == MAX_VALUE) {
7672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Emit the inverse
7692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                result.append('^');
7702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                for (int i = 1; i < count; ++i) {
7722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int start = getRangeEnd(i-1)+1;
7732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int end = getRangeStart(i)-1;
7742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    _appendToPat(result, start, escapeUnprintable);
7752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (start != end) {
7762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if ((start+1) != end) {
7772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            result.append('-');
7782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
7792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        _appendToPat(result, end, escapeUnprintable);
7802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
7812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
7822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Default; emit the ranges as pairs
7852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
7862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                for (int i = 0; i < count; ++i) {
7872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int start = getRangeStart(i);
7882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int end = getRangeEnd(i);
7892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    _appendToPat(result, start, escapeUnprintable);
7902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (start != end) {
7912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if ((start+1) != end) {
7922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            result.append('-');
7932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
7942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        _appendToPat(result, end, escapeUnprintable);
7952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
7962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
7972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (includeStrings && strings.size() > 0) {
8002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                for (String s : strings) {
8012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    result.append('{');
8022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    _appendToPat(result, s, escapeUnprintable);
8032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    result.append('}');
8042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
8052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            result.append(']');
8072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return result;
8082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } catch (IOException e) {
8092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new ICUUncheckedIOException(e);
8102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
8122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
8142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Returns the number of elements in this set (its cardinality)
8152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Note than the elements of a set may include both individual
8162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * codepoints and strings.
8172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
8182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return the number of elements in this set (its cardinality).
8192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
8202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int size() {
8212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int n = 0;
8222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int count = getRangeCount();
8232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = 0; i < count; ++i) {
8242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n += getRangeEnd(i) - getRangeStart(i) + 1;
8252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return n + strings.size();
8272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
8282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
8302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Returns <tt>true</tt> if this set contains no elements.
8312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
8322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return <tt>true</tt> if this set contains no elements.
8332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
8342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean isEmpty() {
8352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return len == 1 && strings.size() == 0;
8362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
8372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
8392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Implementation of UnicodeMatcher API.  Returns <tt>true</tt> if
8402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * this set contains any character whose low byte is the given
8412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * value.  This is used by <tt>RuleBasedTransliterator</tt> for
8422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * indexing.
8432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
844f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
8452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean matchesIndexValue(int v) {
8462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /* The index value v, in the range [0,255], is contained in this set if
8472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * it is contained in any pair of this set.  Pairs either have the high
8482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * bytes equal, or unequal.  If the high bytes are equal, then we have
8492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * aaxx..aayy, where aa is the high byte.  Then v is contained if xx <=
8502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * v <= yy.  If the high bytes are unequal we have aaxx..bbyy, bb>aa.
8512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Then v is contained if xx <= v || v <= yy.  (This is identical to the
8522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * time zone month containment logic.)
8532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
8542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i=0; i<getRangeCount(); ++i) {
8552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int low = getRangeStart(i);
8562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int high = getRangeEnd(i);
8572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if ((low & ~0xFF) == (high & ~0xFF)) {
8582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if ((low & 0xFF) <= v && v <= (high & 0xFF)) {
8592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return true;
8602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
8612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if ((low & 0xFF) <= v || v <= (high & 0xFF)) {
8622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return true;
8632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (strings.size() != 0) {
8662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for (String s : strings) {
8672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //if (s.length() == 0) {
8682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //    // Empty strings match everything
8692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //    return true;
8702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //}
8712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // assert(s.length() != 0); // We enforce this elsewhere
8722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int c = UTF16.charAt(s, 0);
8732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if ((c & 0xFF) == v) {
8742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return true;
8752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
8762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return false;
8792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
8802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
8822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Implementation of UnicodeMatcher.matches().  Always matches the
8832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * longest possible multichar string.
8842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
885f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
8862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int matches(Replaceable text,
8872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int[] offset,
8882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int limit,
8892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            boolean incremental) {
8902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (offset[0] == limit) {
8922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Strings, if any, have length != 0, so we don't worry
8932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // about them here.  If we ever allow zero-length strings
8942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // we much check for them here.
8952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (contains(UnicodeMatcher.ETHER)) {
896f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                return incremental ? U_PARTIAL_MATCH : U_MATCH;
8972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
8982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return U_MISMATCH;
8992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
9002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
9012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (strings.size() != 0) { // try strings first
9022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // might separate forward and backward loops later
9042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // for now they are combined
9052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // TODO Improve efficiency of this, at least in the forward
9072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // direction, if not in both.  In the forward direction we
9082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // can assume the strings are sorted.
9092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                boolean forward = offset[0] < limit;
9112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // firstChar is the leftmost char to match in the
9132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // forward direction or the rightmost char to match in
9142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // the reverse direction.
9152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                char firstChar = text.charAt(offset[0]);
9162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // If there are multiple strings that can match we
9182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // return the longest match.
9192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int highWaterLength = 0;
9202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                for (String trial : strings) {
9222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    //if (trial.length() == 0) {
9232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    //    return U_MATCH; // null-string always matches
9242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    //}
9252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // assert(trial.length() != 0); // We ensure this elsewhere
9262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    char c = trial.charAt(forward ? 0 : trial.length() - 1);
9282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Strings are sorted, so we can optimize in the
9302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // forward direction.
9312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (forward && c > firstChar) break;
932f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                    if (c != firstChar) continue;
9332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int length = matchRest(text, offset[0], limit, trial);
9352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (incremental) {
9372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        int maxLen = forward ? limit-offset[0] : offset[0]-limit;
9382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (length == maxLen) {
9392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // We have successfully matched but only up to limit.
9402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            return U_PARTIAL_MATCH;
9412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
9422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
9432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (length == trial.length()) {
9452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // We have successfully matched the whole string.
9462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (length > highWaterLength) {
9472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            highWaterLength = length;
9482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
9492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // In the forward direction we know strings
9502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // are sorted so we can bail early.
9512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (forward && length < highWaterLength) {
9522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            break;
9532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
9542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        continue;
9552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
9562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
9572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // We've checked all strings without a partial match.
9592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // If we have full matches, return the longest one.
9602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (highWaterLength != 0) {
9612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    offset[0] += forward ? highWaterLength : -highWaterLength;
9622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return U_MATCH;
9632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
9642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
9652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return super.matches(text, offset, limit, incremental);
9662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
9672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
9682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
9702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Returns the longest match for s in text at the given position.
9712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If limit > start then match forward from start+1 to limit
9722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * matching all characters except s.charAt(0).  If limit < start,
9732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * go backward starting from start-1 matching all characters
9742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * except s.charAt(s.length()-1).  This method assumes that the
9752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * first character, text.charAt(start), matches s, so it does not
9762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * check it.
9772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param text the text to match
9782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param start the first character to match.  In the forward
9792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * direction, text.charAt(start) is matched against s.charAt(0).
9802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * In the reverse direction, it is matched against
9812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * s.charAt(s.length()-1).
9822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param limit the limit offset for matching, either last+1 in
9832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the forward direction, or last-1 in the reverse direction,
9842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * where last is the index of the last character to match.
9852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return If part of s matches up to the limit, return |limit -
9862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * start|.  If all of s matches before reaching the limit, return
9872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * s.length().  If there is a mismatch between s and text, return
9882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * 0
9892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
9902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static int matchRest (Replaceable text, int start, int limit, String s) {
9912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int maxLen;
9922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int slen = s.length();
9932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (start < limit) {
9942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            maxLen = limit - start;
9952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (maxLen > slen) maxLen = slen;
9962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for (int i = 1; i < maxLen; ++i) {
9972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (text.charAt(start + i) != s.charAt(i)) return 0;
9982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
9992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
10002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            maxLen = start - limit;
10012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (maxLen > slen) maxLen = slen;
10022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            --slen; // <=> slen = s.length() - 1;
10032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for (int i = 1; i < maxLen; ++i) {
10042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (text.charAt(start - i) != s.charAt(slen - i)) return 0;
10052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
10062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return maxLen;
10082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
10092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
1011f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert     * Tests whether the text matches at the offset. If so, returns the end of the longest substring that it matches. If not, returns -1.
10122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @deprecated This API is ICU internal only.
101393cf604e9dd0525f15bc0a7450b2a35f3884c298Neil Fuller     * @hide original deprecated declaration
1014836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller     * @hide draft / provisional / internal are hidden on Android
10152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
10162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    @Deprecated
10172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int matchesAt(CharSequence text, int offset) {
10182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int lastLen = -1;
10192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        strings:
10202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (strings.size() != 0) {
10212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                char firstChar = text.charAt(offset);
10222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                String trial = null;
10232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // find the first string starting with firstChar
10242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                Iterator<String> it = strings.iterator();
10252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                while (it.hasNext()) {
10262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    trial = it.next();
10272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    char firstStringChar = trial.charAt(0);
10282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (firstStringChar < firstChar) continue;
10292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (firstStringChar > firstChar) break strings;
10302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
10312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // now keep checking string until we get the longest one
10332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                for (;;) {
10342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int tempLen = matchesAt(text, offset, trial);
10352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (lastLen > tempLen) break strings;
10362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    lastLen = tempLen;
10372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (!it.hasNext()) break;
10382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    trial = it.next();
10392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
10402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
10412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (lastLen < 2) {
10432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int cp = UTF16.charAt(text, offset);
10442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (contains(cp)) lastLen = UTF16.getCharCount(cp);
10452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return offset+lastLen;
10482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
10492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
10512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Does one string contain another, starting at a specific offset?
10522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param text text to match
10532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param offsetInText offset within that text
10542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param substring substring to match at offset in text
10552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return -1 if match fails, otherwise other.length()
10562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
10572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Note: This method was moved from CollectionUtilities
10582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static int matchesAt(CharSequence text, int offsetInText, CharSequence substring) {
10592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int len = substring.length();
10602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int textLength = text.length();
10612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (textLength + offsetInText > len) {
10622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return -1;
10632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int i = 0;
10652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int j = offsetInText; i < len; ++i, ++j) {
10662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            char pc = substring.charAt(i);
10672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            char tc = text.charAt(j);
10682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (pc != tc) return -1;
10692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return i;
10712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
10722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
10742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Implementation of UnicodeMatcher API.  Union the set of all
10752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * characters that may be matched by this object into the given
10762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * set.
10772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param toUnionTo the set into which to union the source characters
10782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
1079f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
10802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public void addMatchSetTo(UnicodeSet toUnionTo) {
10812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        toUnionTo.addAll(this);
10822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
10832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
10852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Returns the index of the given character within this set, where
10862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the set is ordered by ascending code point.  If the character
10872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * is not in this set, return -1.  The inverse of this method is
10882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <code>charAt()</code>.
10892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return an index from 0..size()-1, or -1
10902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
10912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int indexOf(int c) {
10922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (c < MIN_VALUE || c > MAX_VALUE) {
10932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(c, 6));
10942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int i = 0;
10962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int n = 0;
10972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (;;) {
10982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int start = list[i++];
10992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (c < start) {
11002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return -1;
11012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
11022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int limit = list[i++];
11032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (c < limit) {
11042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return n + c - start;
11052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
11062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n += limit - start;
11072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
11082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
11092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
11102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
11112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Returns the character at the given index within this set, where
11122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the set is ordered by ascending code point.  If the index is
11132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * out of range, return -1.  The inverse of this method is
11142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <code>indexOf()</code>.
11152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param index an index from 0..size()-1
11162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return the character at the given index, or -1.
11172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
11182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int charAt(int index) {
11192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (index >= 0) {
11202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // len2 is the largest even integer <= len, that is, it is len
11212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // for even values and len-1 for odd values.  With odd values
11222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // the last entry is UNICODESET_HIGH.
11232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int len2 = len & ~1;
11242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for (int i=0; i < len2;) {
11252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int start = list[i++];
11262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int count = list[i++] - start;
11272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (index < count) {
11282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return start + index;
11292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
11302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                index -= count;
11312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
11322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
11332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return -1;
11342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
11352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
11362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
11372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Adds the specified range to this set if it is not already
11382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * present.  If this set already contains the specified range,
1139bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin     * the call leaves this set unchanged.  If <code>end &gt; start</code>
11402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * then an empty range is added, leaving the set unchanged.
11412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
11422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param start first character, inclusive, of range to be added
11432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * to this set.
11442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param end last character, inclusive, of range to be added
11452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * to this set.
11462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
11472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet add(int start, int end) {
11482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        checkFrozen();
11492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return add_unchecked(start, end);
11502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
11512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
11522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
11532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Adds all characters in range (uses preferred naming convention).
11542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param start The index of where to start on adding all characters.
11552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param end The index of where to end on adding all characters.
11562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return a reference to this object
11572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
11582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet addAll(int start, int end) {
11592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        checkFrozen();
11602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return add_unchecked(start, end);
11612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
11622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
11632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // for internal use, after checkFrozen has been called
11642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private UnicodeSet add_unchecked(int start, int end) {
11652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (start < MIN_VALUE || start > MAX_VALUE) {
11662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
11672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
11682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (end < MIN_VALUE || end > MAX_VALUE) {
11692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(end, 6));
11702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
11712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (start < end) {
11722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            add(range(start, end), 2, 0);
11732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if (start == end) {
11742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            add(start);
11752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
11762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
11772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
11782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
11792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //    /**
11802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //     * Format out the inversion list as a string, for debugging.  Uncomment when
11812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //     * needed.
11822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //     */
11832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //    public final String dump() {
11842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        StringBuffer buf = new StringBuffer("[");
11852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        for (int i=0; i<len; ++i) {
11862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //            if (i != 0) buf.append(", ");
11872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //            int c = list[i];
11882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //            //if (c <= 0x7F && c != '\n' && c != '\r' && c != '\t' && c != ' ') {
11892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //            //    buf.append((char) c);
11902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //            //} else {
11912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //                buf.append("U+").append(Utility.hex(c, (c<0x10000)?4:6));
11922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //            //}
11932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        }
11942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        buf.append("]");
11952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        return buf.toString();
11962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //    }
11972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
11982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
11992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Adds the specified character to this set if it is not already
12002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * present.  If this set already contains the specified character,
12012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the call leaves this set unchanged.
12022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
12032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final UnicodeSet add(int c) {
12042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        checkFrozen();
12052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return add_unchecked(c);
12062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
12072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // for internal use only, after checkFrozen has been called
12092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private final UnicodeSet add_unchecked(int c) {
12102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (c < MIN_VALUE || c > MAX_VALUE) {
12112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(c, 6));
12122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
12132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // find smallest i such that c < list[i]
12152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // if odd, then it is IN the set
12162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // if even, then it is OUT of the set
12172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int i = findCodePoint(c);
12182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // already in set?
12202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if ((i & 1) != 0) return this;
12212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // HIGH is 0x110000
12232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // assert(list[len-1] == HIGH);
12242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // empty = [HIGH]
12262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // [start_0, limit_0, start_1, limit_1, HIGH]
12272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH]
12292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //                             ^
12302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //                             list[i]
12312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // i == 0 means c is before the first range
12332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // TODO: Is the "list[i]-1" a typo? Even if you pass MAX_VALUE into
12342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //      add_unchecked, the maximum value that "c" will be compared to
12352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //      is "MAX_VALUE-1" meaning that "if (c == MAX_VALUE)" will
12362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //      never be reached according to this logic.
12372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (c == list[i]-1) {
12382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // c is before start of next range
12392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            list[i] = c;
12402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // if we touched the HIGH mark, then add a new one
1241f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert            if (c == MAX_VALUE) {
12422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ensureCapacity(len+1);
12432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                list[len++] = HIGH;
12442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
12452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (i > 0 && c == list[i-1]) {
12462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // collapse adjacent ranges
12472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // [..., start_k-1, c, c, limit_k, ..., HIGH]
12492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //                     ^
12502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //                     list[i]
12512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                System.arraycopy(list, i+1, list, i-1, len-i-1);
12522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                len -= 2;
12532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
12542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
12552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else if (i > 0 && c == list[i-1]) {
12572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // c is after end of prior range
12582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            list[i-1]++;
12592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // no need to chcek for collapse here
12602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
12612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else {
12632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // At this point we know the new char is not adjacent to
12642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // any existing ranges, and it is not 10FFFF.
12652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH]
12682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //                             ^
12692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //                             list[i]
12702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // [..., start_k-1, limit_k-1, c, c+1, start_k, limit_k, ..., HIGH]
12722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //                             ^
12732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //                             list[i]
12742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Don't use ensureCapacity() to save on copying.
12762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // NOTE: This has no measurable impact on performance,
12772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // but it might help in some usage patterns.
12782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (len+2 > list.length) {
12792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int[] temp = new int[len + 2 + GROW_EXTRA];
12802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (i != 0) System.arraycopy(list, 0, temp, 0, i);
12812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                System.arraycopy(list, i, temp, i+2, len-i);
12822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                list = temp;
12832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
12842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                System.arraycopy(list, i, list, i+2, len-i);
12852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
12862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            list[i] = c;
12882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            list[i+1] = c+1;
12892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            len += 2;
12902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
12912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        pat = null;
12932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
12942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
12952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
12972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Adds the specified multicharacter to this set if it is not already
12982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * present.  If this set already contains the multicharacter,
12992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the call leaves this set unchanged.
1300bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin     * Thus "ch" =&gt; {"ch"}
13012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
13022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param s the source string
13032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return this object, for chaining
13042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
13052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final UnicodeSet add(CharSequence s) {
13062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        checkFrozen();
13072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int cp = getSingleCP(s);
13082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (cp < 0) {
13092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            strings.add(s.toString());
13102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            pat = null;
13112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
13122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            add_unchecked(cp, cp);
13132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
13142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
13152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
13162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
13172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
13182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Utility for getting code point from single code point CharSequence.
13192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * See the public UTF16.getSingleCodePoint()
13202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return a code point IF the string consists of a single one.
13212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * otherwise returns -1.
13222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param s to test
13232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
13242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static int getSingleCP(CharSequence s) {
13252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (s.length() < 1) {
13262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalArgumentException("Can't use zero-length strings in UnicodeSet");
13272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
13282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (s.length() > 2) return -1;
13292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (s.length() == 1) return s.charAt(0);
13302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
13312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // at this point, len = 2
1332f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        int cp = UTF16.charAt(s, 0);
13332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (cp > 0xFFFF) { // is surrogate pair
13342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return cp;
13352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
13362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return -1;
13372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
13382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
13392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
1340bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin     * Adds each of the characters in this string to the set. Thus "ch" =&gt; {"c", "h"}
13412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If this set already any particular character, it has no effect on that character.
13422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param s the source string
13432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return this object, for chaining
13442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
13452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final UnicodeSet addAll(CharSequence s) {
13462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        checkFrozen();
13472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int cp;
13482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
13492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            cp = UTF16.charAt(s, i);
13502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            add_unchecked(cp, cp);
13512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
13522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
13532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
13542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
13552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
13562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
13572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If this set already any particular character, it has no effect on that character.
13582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param s the source string
13592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return this object, for chaining
13602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
13612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final UnicodeSet retainAll(CharSequence s) {
13622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return retainAll(fromAll(s));
13632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
13642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
13652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
13662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Complement EACH of the characters in this string. Note: "ch" == {"c", "h"}
13672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If this set already any particular character, it has no effect on that character.
13682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param s the source string
13692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return this object, for chaining
13702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
13712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final UnicodeSet complementAll(CharSequence s) {
13722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return complementAll(fromAll(s));
13732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
13742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
13752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
13762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Remove EACH of the characters in this string. Note: "ch" == {"c", "h"}
13772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If this set already any particular character, it has no effect on that character.
13782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param s the source string
13792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return this object, for chaining
13802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
13812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final UnicodeSet removeAll(CharSequence s) {
13822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return removeAll(fromAll(s));
13832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
13842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
13852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
13862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Remove all strings from this UnicodeSet
13872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return this object, for chaining
13882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
13892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final UnicodeSet removeAllStrings() {
13902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        checkFrozen();
13912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (strings.size() != 0) {
13922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            strings.clear();
13932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            pat = null;
13942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
13952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
13962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
13972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
13982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
1399bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin     * Makes a set from a multicharacter string. Thus "ch" =&gt; {"ch"}
14002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
14012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param s the source string
14022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return a newly created set containing the given string
14032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
14042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static UnicodeSet from(CharSequence s) {
14052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return new UnicodeSet().add(s);
14062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
1410bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin     * Makes a set from each of the characters in the string. Thus "ch" =&gt; {"c", "h"}
14112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param s the source string
14122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return a newly created set containing the given characters
14132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
14142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static UnicodeSet fromAll(CharSequence s) {
14152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return new UnicodeSet().addAll(s);
14162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
14202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Retain only the elements in this set that are contained in the
1421bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin     * specified range.  If <code>end &gt; start</code> then an empty range is
14222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * retained, leaving the set empty.
14232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
14242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param start first character, inclusive, of range to be retained
14252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * to this set.
14262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param end last character, inclusive, of range to be retained
14272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * to this set.
14282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
14292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet retain(int start, int end) {
14302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        checkFrozen();
14312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (start < MIN_VALUE || start > MAX_VALUE) {
14322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
14332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
14342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (end < MIN_VALUE || end > MAX_VALUE) {
14352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(end, 6));
14362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
14372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (start <= end) {
14382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            retain(range(start, end), 2, 0);
14392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
14402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            clear();
14412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
14422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
14432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
14462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Retain the specified character from this set if it is present.
14472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Upon return this set will be empty if it did not contain c, or
14482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * will only contain c if it did contain c.
14492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param c the character to be retained
14502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return this object, for chaining
14512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
14522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final UnicodeSet retain(int c) {
14532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return retain(c, c);
14542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
14572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Retain the specified string in this set if it is present.
14582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Upon return this set will be empty if it did not contain s, or
14592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * will only contain s if it did contain s.
14602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param cs the string to be retained
14612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return this object, for chaining
14622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
14632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final UnicodeSet retain(CharSequence cs) {
14642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1465f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        int cp = getSingleCP(cs);
14662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (cp < 0) {
14672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            String s = cs.toString();
14682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            boolean isIn = strings.contains(s);
14692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (isIn && size() == 1) {
14702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return this;
14712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
14722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            clear();
14732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            strings.add(s);
14742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            pat = null;
14752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
14762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            retain(cp, cp);
14772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
14782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
14792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
14822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Removes the specified range from this set if it is present.
14832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The set will not contain the specified range once the call
1484bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin     * returns.  If <code>end &gt; start</code> then an empty range is
14852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * removed, leaving the set unchanged.
14862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
14872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param start first character, inclusive, of range to be removed
14882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * from this set.
14892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param end last character, inclusive, of range to be removed
14902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * from this set.
14912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
14922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet remove(int start, int end) {
14932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        checkFrozen();
14942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (start < MIN_VALUE || start > MAX_VALUE) {
14952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
14962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
14972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (end < MIN_VALUE || end > MAX_VALUE) {
14982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(end, 6));
14992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
15002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (start <= end) {
15012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            retain(range(start, end), 2, 2);
15022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
15032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
15042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
15052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
15062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
15072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Removes the specified character from this set if it is present.
15082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The set will not contain the specified character once the call
15092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * returns.
15102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param c the character to be removed
15112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return this object, for chaining
15122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
15132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final UnicodeSet remove(int c) {
15142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return remove(c, c);
15152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
15162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
15172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
15182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Removes the specified string from this set if it is present.
15192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The set will not contain the specified string once the call
15202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * returns.
15212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param s the string to be removed
15222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return this object, for chaining
15232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
15242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final UnicodeSet remove(CharSequence s) {
15252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int cp = getSingleCP(s);
15262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (cp < 0) {
15272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            strings.remove(s.toString());
15282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            pat = null;
15292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
15302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            remove(cp, cp);
15312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
15322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
15332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
15342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
15352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
15362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Complements the specified range in this set.  Any character in
15372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the range will be removed if it is in this set, or will be
1538bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin     * added if it is not in this set.  If <code>end &gt; start</code>
15392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * then an empty range is complemented, leaving the set unchanged.
15402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
15412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param start first character, inclusive, of range to be removed
15422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * from this set.
15432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param end last character, inclusive, of range to be removed
15442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * from this set.
15452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
15462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet complement(int start, int end) {
15472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        checkFrozen();
15482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (start < MIN_VALUE || start > MAX_VALUE) {
15492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
15502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
15512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (end < MIN_VALUE || end > MAX_VALUE) {
15522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(end, 6));
15532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
15542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (start <= end) {
15552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            xor(range(start, end), 2, 0);
15562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
15572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        pat = null;
15582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
15592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
15602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
15612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
15622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Complements the specified character in this set.  The character
15632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * will be removed if it is in this set, or will be added if it is
15642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * not in this set.
15652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
15662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final UnicodeSet complement(int c) {
15672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return complement(c, c);
15682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
15692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
15702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
15712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * This is equivalent to
15722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <code>complement(MIN_VALUE, MAX_VALUE)</code>.
15732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
15742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet complement() {
15752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        checkFrozen();
15762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (list[0] == LOW) {
15772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            System.arraycopy(list, 1, list, 0, len-1);
15782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            --len;
15792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
15802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ensureCapacity(len+1);
15812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            System.arraycopy(list, 0, list, 1, len);
15822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            list[0] = LOW;
15832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ++len;
15842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
15852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        pat = null;
15862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
15872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
15882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
15892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
15902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Complement the specified string in this set.
15912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The set will not contain the specified string once the call
15922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * returns.
15932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
15942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param s the string to complement
15952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return this object, for chaining
15962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
15972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final UnicodeSet complement(CharSequence s) {
15982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        checkFrozen();
15992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int cp = getSingleCP(s);
16002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (cp < 0) {
16012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            String s2 = s.toString();
16022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (strings.contains(s2)) {
16032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                strings.remove(s2);
16042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
16052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                strings.add(s2);
16062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
16072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            pat = null;
16082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
16092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            complement(cp, cp);
16102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
16112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
16122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
16132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
16142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
16152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Returns true if this set contains the given character.
16162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param c character to be checked for containment
16172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return true if the test condition is met
16182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
1619f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
16202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean contains(int c) {
16212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (c < MIN_VALUE || c > MAX_VALUE) {
16222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(c, 6));
16232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
16242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (bmpSet != null) {
16252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return bmpSet.contains(c);
16262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
16272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (stringSpan != null) {
16282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return stringSpan.contains(c);
16292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
16302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
16312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /*
16322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Set i to the index of the start item greater than ch
16332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // We know we will terminate without length test!
16342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int i = -1;
16352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while (true) {
16362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (c < list[++i]) break;
16372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
16382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
16392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
16402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int i = findCodePoint(c);
16412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
16422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return ((i & 1) != 0); // return true if odd
16432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
16442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
16452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
16462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Returns the smallest value i such that c < list[i].  Caller
16472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * must ensure that c is a legal value or this method will enter
16482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * an infinite loop.  This method performs a binary search.
16492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param c a character in the range MIN_VALUE..MAX_VALUE
16502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * inclusive
16512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return the smallest integer i in the range 0..len-1,
16522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * inclusive, such that c < list[i]
16532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
16542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private final int findCodePoint(int c) {
16552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /* Examples:
16562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                           findCodePoint(c)
16572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller           set              list[]         c=0 1 3 4 7 8
16582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller           ===              ==============   ===========
16592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller           []               [110000]         0 0 0 0 0 0
16602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller           [\u0000-\u0003]  [0, 4, 110000]   1 1 1 2 2 2
16612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller           [\u0004-\u0007]  [4, 8, 110000]   0 0 0 1 1 2
16622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller           [:all:]          [0, 110000]      1 1 1 1 1 1
16632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
16642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
16652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Return the smallest i such that c < list[i].  Assume
16662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // list[len - 1] == HIGH and that c is legal (0..HIGH-1).
16672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (c < list[0]) return 0;
16682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // High runner test.  c is often after the last range, so an
16692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // initial check for this condition pays off.
16702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (len >= 2 && c >= list[len-2]) return len-1;
16712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int lo = 0;
16722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int hi = len - 1;
16732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // invariant: c >= list[lo]
16742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // invariant: c < list[hi]
16752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (;;) {
16762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int i = (lo + hi) >>> 1;
16772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (i == lo) return hi;
16782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (c < list[i]) {
16792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            hi = i;
16802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
16812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            lo = i;
16822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
16832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
16842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
16852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
16862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //    //----------------------------------------------------------------
16872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //    // Unrolled binary search
16882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //    //----------------------------------------------------------------
16892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
16902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //    private int validLen = -1; // validated value of len
16912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //    private int topOfLow;
16922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //    private int topOfHigh;
16932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //    private int power;
16942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //    private int deltaStart;
16952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
16962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //    private void validate() {
16972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        if (len <= 1) {
16982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //            throw new IllegalArgumentException("list.len==" + len + "; must be >1");
16992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        }
17002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
17012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        // find greatest power of 2 less than or equal to len
17022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        for (power = exp2.length-1; power > 0 && exp2[power] > len; power--) {}
17032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
17042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        // assert(exp2[power] <= len);
17052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
17062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        // determine the starting points
17072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        topOfLow = exp2[power] - 1;
17082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        topOfHigh = len - 1;
17092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        deltaStart = exp2[power-1];
17102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        validLen = len;
17112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //    }
17122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
17132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //    private static final int exp2[] = {
17142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        0x1, 0x2, 0x4, 0x8,
17152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        0x10, 0x20, 0x40, 0x80,
17162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        0x100, 0x200, 0x400, 0x800,
17172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        0x1000, 0x2000, 0x4000, 0x8000,
17182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        0x10000, 0x20000, 0x40000, 0x80000,
17192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        0x100000, 0x200000, 0x400000, 0x800000,
17202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        0x1000000, 0x2000000, 0x4000000, 0x8000000,
17212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        0x10000000, 0x20000000 // , 0x40000000 // no unsigned int in Java
17222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //    };
17232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
17242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //    /**
17252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //     * Unrolled lowest index GT.
17262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //     */
17272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //    private final int leastIndexGT(int searchValue) {
17282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
17292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        if (len != validLen) {
17302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //            if (len == 1) return 0;
17312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //            validate();
17322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        }
17332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        int temp;
17342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
17352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        // set up initial range to search. Each subrange is a power of two in length
17362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        int high = searchValue < list[topOfLow] ? topOfLow : topOfHigh;
17372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
17382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        // Completely unrolled binary search, folhighing "Programming Pearls"
17392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        // Each case deliberately falls through to the next
17402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        // Logically, list[-1] < all_search_values && list[count] > all_search_values
17412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        // although the values -1 and count are never actually touched.
17422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
17432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        // The bounds at each point are low & high,
17442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        // where low == high - delta*2
17452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        // so high - delta is the midpoint
17462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
17472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        // The invariant AFTER each line is that list[low] < searchValue <= list[high]
17482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
17492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        switch (power) {
17502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        //case 31: if (searchValue < list[temp = high-0x40000000]) high = temp; // no unsigned int in Java
17512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        case 30: if (searchValue < list[temp = high-0x20000000]) high = temp;
17522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        case 29: if (searchValue < list[temp = high-0x10000000]) high = temp;
17532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
17542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        case 28: if (searchValue < list[temp = high- 0x8000000]) high = temp;
17552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        case 27: if (searchValue < list[temp = high- 0x4000000]) high = temp;
17562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        case 26: if (searchValue < list[temp = high- 0x2000000]) high = temp;
17572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        case 25: if (searchValue < list[temp = high- 0x1000000]) high = temp;
17582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
17592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        case 24: if (searchValue < list[temp = high-  0x800000]) high = temp;
17602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        case 23: if (searchValue < list[temp = high-  0x400000]) high = temp;
17612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        case 22: if (searchValue < list[temp = high-  0x200000]) high = temp;
17622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        case 21: if (searchValue < list[temp = high-  0x100000]) high = temp;
17632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
17642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        case 20: if (searchValue < list[temp = high-   0x80000]) high = temp;
17652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        case 19: if (searchValue < list[temp = high-   0x40000]) high = temp;
17662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        case 18: if (searchValue < list[temp = high-   0x20000]) high = temp;
17672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        case 17: if (searchValue < list[temp = high-   0x10000]) high = temp;
17682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
17692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        case 16: if (searchValue < list[temp = high-    0x8000]) high = temp;
17702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        case 15: if (searchValue < list[temp = high-    0x4000]) high = temp;
17712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        case 14: if (searchValue < list[temp = high-    0x2000]) high = temp;
17722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        case 13: if (searchValue < list[temp = high-    0x1000]) high = temp;
17732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
17742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        case 12: if (searchValue < list[temp = high-     0x800]) high = temp;
17752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        case 11: if (searchValue < list[temp = high-     0x400]) high = temp;
17762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        case 10: if (searchValue < list[temp = high-     0x200]) high = temp;
17772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        case  9: if (searchValue < list[temp = high-     0x100]) high = temp;
17782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
17792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        case  8: if (searchValue < list[temp = high-      0x80]) high = temp;
17802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        case  7: if (searchValue < list[temp = high-      0x40]) high = temp;
17812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        case  6: if (searchValue < list[temp = high-      0x20]) high = temp;
17822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        case  5: if (searchValue < list[temp = high-      0x10]) high = temp;
17832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
17842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        case  4: if (searchValue < list[temp = high-       0x8]) high = temp;
17852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        case  3: if (searchValue < list[temp = high-       0x4]) high = temp;
17862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        case  2: if (searchValue < list[temp = high-       0x2]) high = temp;
17872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        case  1: if (searchValue < list[temp = high-       0x1]) high = temp;
17882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        }
17892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
17902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        return high;
17912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //    }
17922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
17932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //    // For debugging only
17942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //    public int len() {
17952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        return len;
17962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //    }
17972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
17982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //    //----------------------------------------------------------------
17992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //    //----------------------------------------------------------------
18002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
18012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
18022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Returns true if this set contains every character
18032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * of the given range.
18042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param start first character, inclusive, of the range
18052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param end last character, inclusive, of the range
18062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return true if the test condition is met
18072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
18082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean contains(int start, int end) {
18092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (start < MIN_VALUE || start > MAX_VALUE) {
18102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
18112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
18122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (end < MIN_VALUE || end > MAX_VALUE) {
18132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(end, 6));
18142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
18152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //int i = -1;
18162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //while (true) {
18172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //    if (start < list[++i]) break;
18182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //}
18192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int i = findCodePoint(start);
18202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return ((i & 1) != 0 && end < list[i]);
18212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
18222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
18232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
18242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Returns <tt>true</tt> if this set contains the given
18252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * multicharacter string.
18262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param s string to be checked for containment
18272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return <tt>true</tt> if this set contains the specified string
18282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
18292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final boolean contains(CharSequence s) {
18302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
18312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int cp = getSingleCP(s);
18322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (cp < 0) {
18332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return strings.contains(s.toString());
18342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
18352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return contains(cp);
18362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
18372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
18382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
18392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
18402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Returns true if this set contains all the characters and strings
18412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * of the given set.
18422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param b set to be checked for containment
18432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return true if the test condition is met
18442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
18452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean containsAll(UnicodeSet b) {
18462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // The specified set is a subset if all of its pairs are contained in
18472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // this set. This implementation accesses the lists directly for speed.
18482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // TODO: this could be faster if size() were cached. But that would affect building speed
18492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // so it needs investigation.
18502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int[] listB = b.list;
18512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        boolean needA = true;
18522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        boolean needB = true;
18532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int aPtr = 0;
18542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int bPtr = 0;
18552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int aLen = len - 1;
18562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int bLen = b.len - 1;
18572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int startA = 0, startB = 0, limitA = 0, limitB = 0;
18582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while (true) {
18592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // double iterations are such a pain...
18602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (needA) {
18612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (aPtr >= aLen) {
18622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // ran out of A. If B is also exhausted, then break;
18632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (needB && bPtr >= bLen) {
18642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        break;
18652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
18662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return false;
18672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
18682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                startA = list[aPtr++];
18692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                limitA = list[aPtr++];
18702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
18712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (needB) {
18722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (bPtr >= bLen) {
18732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // ran out of B. Since we got this far, we have an A and we are ok so far
18742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
18752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
18762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                startB = listB[bPtr++];
18772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                limitB = listB[bPtr++];
18782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
18792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // if B doesn't overlap and is greater than A, get new A
18802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (startB >= limitA) {
18812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                needA = true;
18822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                needB = false;
18832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                continue;
18842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
18852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // if B is wholy contained in A, then get a new B
18862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (startB >= startA && limitB <= limitA) {
18872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                needA = false;
18882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                needB = true;
18892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                continue;
18902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
18912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // all other combinations mean we fail
18922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return false;
18932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
18942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
18952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (!strings.containsAll(b.strings)) return false;
18962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return true;
18972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
18982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
18992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //    /**
19002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //     * Returns true if this set contains all the characters and strings
19012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //     * of the given set.
19022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //     * @param c set to be checked for containment
19032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //     * @return true if the test condition is met
19042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //     * @stable ICU 2.0
19052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //     */
19062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //    public boolean containsAllOld(UnicodeSet c) {
19072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        // The specified set is a subset if all of its pairs are contained in
19082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        // this set.  It's possible to code this more efficiently in terms of
19092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        // direct manipulation of the inversion lists if the need arises.
19102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        int n = c.getRangeCount();
19112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        for (int i=0; i<n; ++i) {
19122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //            if (!contains(c.getRangeStart(i), c.getRangeEnd(i))) {
19132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //                return false;
19142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //            }
19152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        }
19162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        if (!strings.containsAll(c.strings)) return false;
19172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        return true;
19182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //    }
19192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
19202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
19212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Returns true if there is a partition of the string such that this set contains each of the partitioned strings.
19222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * For example, for the Unicode set [a{bc}{cd}]<br>
19232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * containsAll is true for each of: "a", "bc", ""cdbca"<br>
19242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * containsAll is false for each of: "acb", "bcda", "bcx"<br>
19252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param s string containing characters to be checked for containment
19262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return true if the test condition is met
19272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
19282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean containsAll(String s) {
19292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int cp;
19302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
19312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            cp = UTF16.charAt(s, i);
19322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (!contains(cp))  {
19332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (strings.size() == 0) {
19342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return false;
19352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
19362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return containsAll(s, 0);
19372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
19382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
19392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return true;
19402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
19412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
19422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
19432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Recursive routine called if we fail to find a match in containsAll, and there are strings
19442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param s source string
19452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param i point to match to the end on
19462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return true if ok
19472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
19482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private boolean containsAll(String s, int i) {
19492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (i >= s.length()) {
19502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return true;
19512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
19522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int  cp= UTF16.charAt(s, i);
19532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (contains(cp) && containsAll(s, i+UTF16.getCharCount(cp))) {
19542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return true;
19552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
19562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (String setStr : strings) {
19572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (s.startsWith(setStr, i) &&  containsAll(s, i+setStr.length())) {
19582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return true;
19592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
19602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
19612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return false;
19622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
19632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
19642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
19652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
19662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Get the Regex equivalent for this UnicodeSet
19672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return regex pattern equivalent to this UnicodeSet
19682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @deprecated This API is ICU internal only.
196993cf604e9dd0525f15bc0a7450b2a35f3884c298Neil Fuller     * @hide original deprecated declaration
1970836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller     * @hide draft / provisional / internal are hidden on Android
19712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
19722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    @Deprecated
19732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public String getRegexEquivalent() {
19742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (strings.size() == 0) {
19752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return toString();
19762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
19772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder result = new StringBuilder("(?:");
19782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        appendNewPattern(result, true, false);
19792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (String s : strings) {
19802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            result.append('|');
19812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            _appendToPat(result, s, true);
19822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
19832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return result.append(")").toString();
19842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
19852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
19862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
19872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Returns true if this set contains none of the characters
19882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * of the given range.
19892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param start first character, inclusive, of the range
19902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param end last character, inclusive, of the range
19912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return true if the test condition is met
19922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
19932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean containsNone(int start, int end) {
19942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (start < MIN_VALUE || start > MAX_VALUE) {
19952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
19962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
19972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (end < MIN_VALUE || end > MAX_VALUE) {
19982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(end, 6));
19992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
20002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int i = -1;
20012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while (true) {
20022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (start < list[++i]) break;
20032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
20042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return ((i & 1) == 0 && end < list[i]);
20052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
20062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
20072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
20082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Returns true if none of the characters or strings in this UnicodeSet appears in the string.
20092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * For example, for the Unicode set [a{bc}{cd}]<br>
20102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * containsNone is true for: "xy", "cb"<br>
20112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * containsNone is false for: "a", "bc", "bcd"<br>
20122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param b set to be checked for containment
20132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return true if the test condition is met
20142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
20152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean containsNone(UnicodeSet b) {
20162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // The specified set is a subset if some of its pairs overlap with some of this set's pairs.
20172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // This implementation accesses the lists directly for speed.
20182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int[] listB = b.list;
20192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        boolean needA = true;
20202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        boolean needB = true;
20212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int aPtr = 0;
20222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int bPtr = 0;
20232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int aLen = len - 1;
20242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int bLen = b.len - 1;
20252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int startA = 0, startB = 0, limitA = 0, limitB = 0;
20262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while (true) {
20272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // double iterations are such a pain...
20282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (needA) {
20292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (aPtr >= aLen) {
20302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // ran out of A: break so we test strings
20312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
20322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
20332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                startA = list[aPtr++];
20342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                limitA = list[aPtr++];
20352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
20362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (needB) {
20372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (bPtr >= bLen) {
20382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // ran out of B: break so we test strings
20392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
20402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
20412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                startB = listB[bPtr++];
20422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                limitB = listB[bPtr++];
20432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
20442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // if B is higher than any part of A, get new A
20452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (startB >= limitA) {
20462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                needA = true;
20472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                needB = false;
20482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                continue;
20492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
20502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // if A is higher than any part of B, get new B
20512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (startA >= limitB) {
20522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                needA = false;
20532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                needB = true;
20542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                continue;
20552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
20562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // all other combinations mean we fail
20572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return false;
20582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
20592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
20602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (!SortedSetRelation.hasRelation(strings, SortedSetRelation.DISJOINT, b.strings)) return false;
20612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return true;
20622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
20632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
20642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //    /**
20652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //     * Returns true if none of the characters or strings in this UnicodeSet appears in the string.
20662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //     * For example, for the Unicode set [a{bc}{cd}]<br>
20672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //     * containsNone is true for: "xy", "cb"<br>
20682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //     * containsNone is false for: "a", "bc", "bcd"<br>
20692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //     * @param c set to be checked for containment
20702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //     * @return true if the test condition is met
20712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //     * @stable ICU 2.0
20722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //     */
20732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //    public boolean containsNoneOld(UnicodeSet c) {
20742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        // The specified set is a subset if all of its pairs are contained in
20752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        // this set.  It's possible to code this more efficiently in terms of
20762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        // direct manipulation of the inversion lists if the need arises.
20772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        int n = c.getRangeCount();
20782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        for (int i=0; i<n; ++i) {
20792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //            if (!containsNone(c.getRangeStart(i), c.getRangeEnd(i))) {
20802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //                return false;
20812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //            }
20822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        }
20832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        if (!SortedSetRelation.hasRelation(strings, SortedSetRelation.DISJOINT, c.strings)) return false;
20842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //        return true;
20852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //    }
20862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
20872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
20882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Returns true if this set contains none of the characters
20892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * of the given string.
20902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param s string containing characters to be checked for containment
20912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return true if the test condition is met
20922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
20932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean containsNone(CharSequence s) {
20942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return span(s, SpanCondition.NOT_CONTAINED) == s.length();
20952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
20962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
20972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
20982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Returns true if this set contains one or more of the characters
20992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * in the given range.
21002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param start first character, inclusive, of the range
21012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param end last character, inclusive, of the range
21022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return true if the condition is met
21032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
21042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final boolean containsSome(int start, int end) {
21052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return !containsNone(start, end);
21062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
21072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
21082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
21092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Returns true if this set contains one or more of the characters
21102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * and strings of the given set.
21112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param s set to be checked for containment
21122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return true if the condition is met
21132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
21142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final boolean containsSome(UnicodeSet s) {
21152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return !containsNone(s);
21162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
21172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
21182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
21192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Returns true if this set contains one or more of the characters
21202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * of the given string.
21212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param s string containing characters to be checked for containment
21222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return true if the condition is met
21232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
21242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final boolean containsSome(CharSequence s) {
21252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return !containsNone(s);
21262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
21272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
21282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
21292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
21302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Adds all of the elements in the specified set to this set if
21312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * they're not already present.  This operation effectively
21322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * modifies this set so that its value is the <i>union</i> of the two
21332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * sets.  The behavior of this operation is unspecified if the specified
21342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * collection is modified while the operation is in progress.
21352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
21362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param c set whose elements are to be added to this set.
21372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
21382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet addAll(UnicodeSet c) {
21392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        checkFrozen();
21402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        add(c.list, c.len, 0);
21412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        strings.addAll(c.strings);
21422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
21432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
21442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
21452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
21462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Retains only the elements in this set that are contained in the
21472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * specified set.  In other words, removes from this set all of
21482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * its elements that are not contained in the specified set.  This
21492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * operation effectively modifies this set so that its value is
21502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the <i>intersection</i> of the two sets.
21512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
21522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param c set that defines which elements this set will retain.
21532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
21542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet retainAll(UnicodeSet c) {
21552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        checkFrozen();
21562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        retain(c.list, c.len, 0);
21572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        strings.retainAll(c.strings);
21582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
21592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
21602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
21612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
21622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Removes from this set all of its elements that are contained in the
21632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * specified set.  This operation effectively modifies this
21642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * set so that its value is the <i>asymmetric set difference</i> of
21652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the two sets.
21662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
21672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param c set that defines which elements will be removed from
21682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *          this set.
21692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
21702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet removeAll(UnicodeSet c) {
21712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        checkFrozen();
21722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        retain(c.list, c.len, 2);
21732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        strings.removeAll(c.strings);
21742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
21752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
21762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
21772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
21782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Complements in this set all elements contained in the specified
21792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * set.  Any character in the other set will be removed if it is
21802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * in this set, or will be added if it is not in this set.
21812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
21822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param c set that defines which elements will be complemented from
21832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *          this set.
21842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
21852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet complementAll(UnicodeSet c) {
21862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        checkFrozen();
21872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        xor(c.list, c.len, 0);
21882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        SortedSetRelation.doOperation(strings, SortedSetRelation.COMPLEMENTALL, c.strings);
21892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
21902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
21912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
21922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
21932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Removes all of the elements from this set.  This set will be
21942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * empty after this call returns.
21952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
21962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet clear() {
21972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        checkFrozen();
21982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        list[0] = HIGH;
21992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        len = 1;
22002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        pat = null;
22012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        strings.clear();
22022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
22032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
22042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
22052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
22062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Iteration method that returns the number of ranges contained in
22072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * this set.
22082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @see #getRangeStart
22092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @see #getRangeEnd
22102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
22112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int getRangeCount() {
22122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return len/2;
22132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
22142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
22152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
22162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Iteration method that returns the first character in the
22172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * specified range of this set.
22182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @exception ArrayIndexOutOfBoundsException if index is outside
22192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the range <code>0..getRangeCount()-1</code>
22202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @see #getRangeCount
22212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @see #getRangeEnd
22222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
22232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int getRangeStart(int index) {
22242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return list[index*2];
22252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
22262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
22272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
22282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Iteration method that returns the last character in the
22292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * specified range of this set.
22302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @exception ArrayIndexOutOfBoundsException if index is outside
22312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the range <code>0..getRangeCount()-1</code>
22322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @see #getRangeStart
22332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @see #getRangeEnd
22342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
22352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int getRangeEnd(int index) {
22362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (list[index*2 + 1] - 1);
22372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
22382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
22392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
22402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Reallocate this objects internal structures to take up the least
22412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * possible space, without changing this object's value.
22422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
22432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet compact() {
22442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        checkFrozen();
22452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (len != list.length) {
22462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int[] temp = new int[len];
22472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            System.arraycopy(list, 0, temp, 0, len);
22482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            list = temp;
22492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
22502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        rangeList = null;
22512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        buffer = null;
22522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
22532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
22542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
22552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
22562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Compares the specified object with this set for equality.  Returns
22572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <tt>true</tt> if the specified object is also a set, the two sets
22582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * have the same size, and every member of the specified set is
22592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * contained in this set (or equivalently, every member of this set is
22602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * contained in the specified set).
22612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
22622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param o Object to be compared for equality with this set.
22632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return <tt>true</tt> if the specified Object is equal to this set.
22642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
2265f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
22662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean equals(Object o) {
22672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (o == null) {
22682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return false;
22692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
22702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (this == o) {
22712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return true;
22722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
22732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        try {
22742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            UnicodeSet that = (UnicodeSet) o;
22752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (len != that.len) return false;
22762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for (int i = 0; i < len; ++i) {
22772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (list[i] != that.list[i]) return false;
22782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
22792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (!strings.equals(that.strings)) return false;
22802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } catch (Exception e) {
22812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return false;
22822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
22832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return true;
22842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
22852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
22862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
22872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Returns the hash code value for this set.
22882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
22892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return the hash code value for this set.
22902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @see java.lang.Object#hashCode()
22912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
2292f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
22932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int hashCode() {
22942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int result = len;
22952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = 0; i < len; ++i) {
22962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            result *= 1000003;
22972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            result += list[i];
22982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
22992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return result;
23002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
23012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
23022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
23032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Return a programmer-readable string representation of this object.
23042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
2305f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
23062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public String toString() {
23072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return toPattern(true);
23082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
23092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
23102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //----------------------------------------------------------------
23112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Implementation: Pattern parsing
23122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //----------------------------------------------------------------
23132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
23142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
23152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Parses the given pattern, starting at the given position.  The character
23162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * at pattern.charAt(pos.getIndex()) must be '[', or the parse fails.
23172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Parsing continues until the corresponding closing ']'.  If a syntax error
23182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * is encountered between the opening and closing brace, the parse fails.
23192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Upon return from a successful parse, the ParsePosition is updated to
23202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * point to the character following the closing ']', and an inversion
23212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * list for the parsed pattern is returned.  This method
23222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * calls itself recursively to parse embedded subpatterns.
23232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
23242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param pattern the string containing the pattern to be parsed.  The
23252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * portion of the string from pos.getIndex(), which must be a '[', to the
23262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * corresponding closing ']', is parsed.
23272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param pos upon entry, the position at which to being parsing.  The
23282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * character at pattern.charAt(pos.getIndex()) must be a '['.  Upon return
23292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * from a successful parse, pos.getIndex() is either the character after the
23302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * closing ']' of the parsed pattern, or pattern.length() if the closing ']'
23312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * is the last character of the pattern string.
23322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return an inversion list for the parsed substring
23332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * of <code>pattern</code>
23342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @exception java.lang.IllegalArgumentException if the parse fails.
23352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @deprecated This API is ICU internal only.
233693cf604e9dd0525f15bc0a7450b2a35f3884c298Neil Fuller     * @hide original deprecated declaration
2337836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller     * @hide draft / provisional / internal are hidden on Android
23382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
23392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    @Deprecated
23402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet applyPattern(String pattern,
23412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ParsePosition pos,
23422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            SymbolTable symbols,
23432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int options) {
23442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
23452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Need to build the pattern in a temporary string because
23462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // _applyPattern calls add() etc., which set pat to empty.
23472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        boolean parsePositionWasNull = pos == null;
23482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (parsePositionWasNull) {
23492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            pos = new ParsePosition(0);
23502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
23512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
23522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder rebuiltPat = new StringBuilder();
23532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        RuleCharacterIterator chars =
23542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                new RuleCharacterIterator(pattern, symbols, pos);
23552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        applyPattern(chars, symbols, rebuiltPat, options);
23562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (chars.inVariable()) {
23572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            syntaxError(chars, "Extra chars in variable value");
23582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
23592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        pat = rebuiltPat.toString();
23602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (parsePositionWasNull) {
23612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int i = pos.getIndex();
23622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
23632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Skip over trailing whitespace
23642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if ((options & IGNORE_SPACE) != 0) {
23652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                i = PatternProps.skipWhiteSpace(pattern, i);
23662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
23672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
23682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (i != pattern.length()) {
23692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                throw new IllegalArgumentException("Parse of \"" + pattern +
23702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        "\" failed at " + i);
23712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
23722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
23732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
23742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
23752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
23762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Add constants to make the applyPattern() code easier to follow.
23772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2378f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    private static final int LAST0_START = 0,
2379f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert            LAST1_RANGE = 1,
23802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            LAST2_SET = 2;
23812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2382f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    private static final int MODE0_NONE = 0,
2383f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert            MODE1_INBRACKET = 1,
23842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            MODE2_OUTBRACKET = 2;
23852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2386f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    private static final int SETMODE0_NONE = 0,
2387f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert            SETMODE1_UNICODESET = 1,
2388f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert            SETMODE2_PROPERTYPAT = 2,
23892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            SETMODE3_PREPARSED = 3;
23902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
23912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
23922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Parse the pattern from the given RuleCharacterIterator.  The
23932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * iterator is advanced over the parsed pattern.
23942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param chars iterator over the pattern characters.  Upon return
23952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * it will be advanced to the first character after the parsed
23962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * pattern, or the end of the iteration if all characters are
23972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * parsed.
23982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param symbols symbol table to use to parse and dereference
23992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * variables, or null if none.
24002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param rebuiltPat the pattern that was parsed, rebuilt or
24012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * copied from the input pattern, as appropriate.
24022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param options a bit mask of zero or more of the following:
24032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * IGNORE_SPACE, CASE.
24042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
24052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private void applyPattern(RuleCharacterIterator chars, SymbolTable symbols,
24062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            Appendable rebuiltPat, int options) {
24072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
24082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Syntax characters: [ ] ^ - & { }
24092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
24102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Recognized special forms for chars, sets: c-c s-s s&s
24112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
24122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int opts = RuleCharacterIterator.PARSE_VARIABLES |
24132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                RuleCharacterIterator.PARSE_ESCAPES;
24142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if ((options & IGNORE_SPACE) != 0) {
24152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            opts |= RuleCharacterIterator.SKIP_WHITESPACE;
24162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
24172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
24182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder patBuf = new StringBuilder(), buf = null;
24192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        boolean usePat = false;
24202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        UnicodeSet scratch = null;
24212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        Object backup = null;
24222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
24232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // mode: 0=before [, 1=between [...], 2=after ]
24242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // lastItem: 0=none, 1=char, 2=set
24252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int lastItem = LAST0_START, lastChar = 0, mode = MODE0_NONE;
24262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        char op = 0;
24272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
24282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        boolean invert = false;
24292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
24302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        clear();
24312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        String lastString = null;
24322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
24332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while (mode != MODE2_OUTBRACKET && !chars.atEnd()) {
24342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //Eclipse stated the following is "dead code"
24352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            /*
24362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (false) {
24372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Debugging assertion
24382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (!((lastItem == 0 && op == 0) ||
24392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        (lastItem == 1 && (op == 0 || op == '-')) ||
24402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        (lastItem == 2 && (op == 0 || op == '-' || op == '&')))) {
24412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    throw new IllegalArgumentException();
24422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
24432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }*/
24442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
24452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int c = 0;
24462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            boolean literal = false;
24472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            UnicodeSet nested = null;
24482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
24492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // -------- Check for property pattern
24502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
24512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // setMode: 0=none, 1=unicodeset, 2=propertypat, 3=preparsed
24522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int setMode = SETMODE0_NONE;
24532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (resemblesPropertyPattern(chars, opts)) {
24542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                setMode = SETMODE2_PROPERTYPAT;
24552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
24562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
24572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // -------- Parse '[' of opening delimiter OR nested set.
24582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // If there is a nested set, use `setMode' to define how
24592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // the set should be parsed.  If the '[' is part of the
24602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // opening delimiter for this pattern, parse special
24612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // strings "[", "[^", "[-", and "[^-".  Check for stand-in
24622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // characters representing a nested set in the symbol
24632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // table.
24642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
24652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
24662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Prepare to backup if necessary
24672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                backup = chars.getPos(backup);
24682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c = chars.next(opts);
24692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                literal = chars.isEscaped();
24702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
24712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (c == '[' && !literal) {
24722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (mode == MODE1_INBRACKET) {
24732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        chars.setPos(backup); // backup
24742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        setMode = SETMODE1_UNICODESET;
24752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
24762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // Handle opening '[' delimiter
24772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        mode = MODE1_INBRACKET;
24782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        patBuf.append('[');
24792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        backup = chars.getPos(backup); // prepare to backup
24802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        c = chars.next(opts);
24812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        literal = chars.isEscaped();
24822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (c == '^' && !literal) {
24832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            invert = true;
24842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            patBuf.append('^');
24852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            backup = chars.getPos(backup); // prepare to backup
24862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            c = chars.next(opts);
24872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            literal = chars.isEscaped();
24882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
24892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // Fall through to handle special leading '-';
24902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // otherwise restart loop for nested [], \p{}, etc.
24912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (c == '-') {
24922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            literal = true;
24932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // Fall through to handle literal '-' below
24942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        } else {
24952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            chars.setPos(backup); // backup
24962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            continue;
24972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
24982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
24992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if (symbols != null) {
25002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    UnicodeMatcher m = symbols.lookupMatcher(c); // may be null
25012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (m != null) {
25022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        try {
25032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            nested = (UnicodeSet) m;
25042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            setMode = SETMODE3_PREPARSED;
25052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        } catch (ClassCastException e) {
25062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            syntaxError(chars, "Syntax error");
25072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
25082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
25092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
25102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
25112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
25122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // -------- Handle a nested set.  This either is inline in
25132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // the pattern or represented by a stand-in that has
25142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // previously been parsed and was looked up in the symbol
25152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // table.
25162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
25172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (setMode != SETMODE0_NONE) {
25182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (lastItem == LAST1_RANGE) {
25192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (op != 0) {
25202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        syntaxError(chars, "Char expected after operator");
25212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
25222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    add_unchecked(lastChar, lastChar);
25232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    _appendToPat(patBuf, lastChar, false);
25242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    lastItem = LAST0_START;
25252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    op = 0;
25262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
25272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
25282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (op == '-' || op == '&') {
25292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    patBuf.append(op);
25302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
25312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
25322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (nested == null) {
25332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (scratch == null) scratch = new UnicodeSet();
25342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    nested = scratch;
25352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
25362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                switch (setMode) {
25372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                case SETMODE1_UNICODESET:
25382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    nested.applyPattern(chars, symbols, patBuf, options);
25392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
25402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                case SETMODE2_PROPERTYPAT:
25412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    chars.skipIgnored(opts);
25422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    nested.applyPropertyPattern(chars, patBuf, symbols);
25432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
25442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                case SETMODE3_PREPARSED: // `nested' already parsed
25452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    nested._toPattern(patBuf, false);
25462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
25472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
25482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
25492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                usePat = true;
25502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
25512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (mode == MODE0_NONE) {
25522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Entire pattern is a category; leave parse loop
25532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    set(nested);
25542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    mode = MODE2_OUTBRACKET;
25552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
25562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
25572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
25582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                switch (op) {
25592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                case '-':
25602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    removeAll(nested);
25612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
25622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                case '&':
25632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    retainAll(nested);
25642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
25652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                case 0:
25662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    addAll(nested);
25672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
25682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
25692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
25702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                op = 0;
25712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                lastItem = LAST2_SET;
25722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
25732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                continue;
25742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
25752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
25762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (mode == MODE0_NONE) {
25772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                syntaxError(chars, "Missing '['");
25782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
25792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
25802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // -------- Parse special (syntax) characters.  If the
25812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // current character is not special, or if it is escaped,
25822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // then fall through and handle it below.
25832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
25842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (!literal) {
25852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                switch (c) {
25862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                case ']':
25872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (lastItem == LAST1_RANGE) {
25882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        add_unchecked(lastChar, lastChar);
25892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        _appendToPat(patBuf, lastChar, false);
25902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
25912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Treat final trailing '-' as a literal
25922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (op == '-') {
25932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        add_unchecked(op, op);
25942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        patBuf.append(op);
25952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else if (op == '&') {
25962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        syntaxError(chars, "Trailing '&'");
25972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
25982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    patBuf.append(']');
25992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    mode = MODE2_OUTBRACKET;
26002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    continue;
26012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                case '-':
26022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (op == 0) {
26032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (lastItem != LAST0_START) {
26042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            op = (char) c;
26052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            continue;
26062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        } else if (lastString != null) {
26072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            op = (char) c;
26082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            continue;
26092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        } else {
26102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // Treat final trailing '-' as a literal
26112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            add_unchecked(c, c);
26122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            c = chars.next(opts);
26132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            literal = chars.isEscaped();
26142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            if (c == ']' && !literal) {
26152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                patBuf.append("-]");
26162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                mode = MODE2_OUTBRACKET;
26172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                continue;
26182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            }
26192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
26202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
26212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    syntaxError(chars, "'-' not after char, string, or set");
26222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
26232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                case '&':
26242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (lastItem == LAST2_SET && op == 0) {
26252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        op = (char) c;
26262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        continue;
26272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
26282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    syntaxError(chars, "'&' not after set");
26292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
26302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                case '^':
26312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    syntaxError(chars, "'^' not after '['");
26322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
26332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                case '{':
26342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (op != 0 && op != '-') {
26352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        syntaxError(chars, "Missing operand after operator");
26362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
26372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (lastItem == LAST1_RANGE) {
26382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        add_unchecked(lastChar, lastChar);
26392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        _appendToPat(patBuf, lastChar, false);
26402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
26412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    lastItem = LAST0_START;
26422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (buf == null) {
26432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        buf = new StringBuilder();
26442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
26452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        buf.setLength(0);
26462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
26472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    boolean ok = false;
26482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    while (!chars.atEnd()) {
26492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        c = chars.next(opts);
26502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        literal = chars.isEscaped();
26512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (c == '}' && !literal) {
26522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            ok = true;
26532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            break;
26542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
26552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        appendCodePoint(buf, c);
26562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
26572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (buf.length() < 1 || !ok) {
26582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        syntaxError(chars, "Invalid multicharacter string");
26592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
26602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // We have new string. Add it to set and continue;
26612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // we don't need to drop through to the further
26622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // processing
26632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    String curString = buf.toString();
26642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (op == '-') {
26652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        int lastSingle = CharSequences.getSingleCodePoint(lastString == null ? "" : lastString);
26662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        int curSingle = CharSequences.getSingleCodePoint(curString);
26672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (lastSingle != Integer.MAX_VALUE && curSingle != Integer.MAX_VALUE) {
26682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            add(lastSingle,curSingle);
26692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        } else {
26702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            try {
26712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                StringRange.expand(lastString, curString, true, strings);
26722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            } catch (Exception e) {
26732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                syntaxError(chars, e.getMessage());
26742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            }
26752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
26762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        lastString = null;
26772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        op = 0;
26782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
26792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        add(curString);
26802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        lastString = curString;
26812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
26822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    patBuf.append('{');
26832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    _appendToPat(patBuf, curString, false);
26842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    patBuf.append('}');
26852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    continue;
26862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                case SymbolTable.SYMBOL_REF:
26872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    //         symbols  nosymbols
26882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // [a-$]   error    error (ambiguous)
26892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // [a$]    anchor   anchor
26902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // [a-$x]  var "x"* literal '$'
26912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // [a-$.]  error    literal '$'
26922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // *We won't get here in the case of var "x"
26932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    backup = chars.getPos(backup);
26942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    c = chars.next(opts);
26952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    literal = chars.isEscaped();
26962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    boolean anchor = (c == ']' && !literal);
26972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (symbols == null && !anchor) {
26982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        c = SymbolTable.SYMBOL_REF;
26992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        chars.setPos(backup);
27002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        break; // literal '$'
27012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
27022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (anchor && op == 0) {
27032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (lastItem == LAST1_RANGE) {
27042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            add_unchecked(lastChar, lastChar);
27052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            _appendToPat(patBuf, lastChar, false);
27062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
27072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        add_unchecked(UnicodeMatcher.ETHER);
27082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        usePat = true;
27092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        patBuf.append(SymbolTable.SYMBOL_REF).append(']');
27102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        mode = MODE2_OUTBRACKET;
27112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        continue;
27122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
27132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    syntaxError(chars, "Unquoted '$'");
27142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
27152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                default:
27162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
27172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
27182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
27192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
27202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // -------- Parse literal characters.  This includes both
27212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // escaped chars ("\u4E01") and non-syntax characters
27222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // ("a").
27232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
27242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            switch (lastItem) {
27252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case LAST0_START:
27262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (op == '-' && lastString != null) {
27272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    syntaxError(chars, "Invalid range");
27282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
27292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                lastItem = LAST1_RANGE;
27302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                lastChar = c;
27312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                lastString = null;
27322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
27332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case LAST1_RANGE:
27342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (op == '-') {
27352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (lastString != null) {
27362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        syntaxError(chars, "Invalid range");
27372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
27382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (lastChar >= c) {
27392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // Don't allow redundant (a-a) or empty (b-a) ranges;
27402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // these are most likely typos.
27412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        syntaxError(chars, "Invalid range");
27422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
27432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    add_unchecked(lastChar, c);
27442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    _appendToPat(patBuf, lastChar, false);
27452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    patBuf.append(op);
27462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    _appendToPat(patBuf, c, false);
27472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    lastItem = LAST0_START;
27482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    op = 0;
27492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
27502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    add_unchecked(lastChar, lastChar);
27512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    _appendToPat(patBuf, lastChar, false);
27522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    lastChar = c;
27532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
27542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
27552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case LAST2_SET:
27562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (op != 0) {
27572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    syntaxError(chars, "Set expected after operator");
27582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
27592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                lastChar = c;
27602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                lastItem = LAST1_RANGE;
27612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
27622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
27632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
27642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
27652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (mode != MODE2_OUTBRACKET) {
27662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            syntaxError(chars, "Missing ']'");
27672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
27682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
27692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        chars.skipIgnored(opts);
27702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
27712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /**
27722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Handle global flags (invert, case insensitivity).  If this
27732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * pattern should be compiled case-insensitive, then we need
27742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * to close over case BEFORE COMPLEMENTING.  This makes
27752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * patterns like /[^abc]/i work.
27762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
27772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if ((options & CASE) != 0) {
27782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            closeOver(CASE);
27792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
27802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (invert) {
27812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            complement();
27822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
27832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
27842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Use the rebuilt pattern (pat) only if necessary.  Prefer the
27852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // generated pattern.
27862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (usePat) {
27872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            append(rebuiltPat, patBuf.toString());
27882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
27892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            appendNewPattern(rebuiltPat, false, true);
27902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
27912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
27922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
27932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static void syntaxError(RuleCharacterIterator chars, String msg) {
27942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        throw new IllegalArgumentException("Error: " + msg + " at \"" +
27952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                Utility.escape(chars.toString()) +
27962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                '"');
27972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
27982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
27992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
28002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Add the contents of the UnicodeSet (as strings) into a collection.
28012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param target collection to add into
28022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
28032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public <T extends Collection<String>> T addAllTo(T target) {
28042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return addAllTo(this, target);
28052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
28062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
28072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
28082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
28092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Add the contents of the UnicodeSet (as strings) into a collection.
28102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param target collection to add into
281139fda05a2af93ea1422c26c0e570d6d7b4a4f4eeJoachim Sauer     * @hide unsupported on Android
28122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
28132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public String[] addAllTo(String[] target) {
28142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return addAllTo(this, target);
28152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
28162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
28172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
28182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Add the contents of the UnicodeSet (as strings) into an array.
281939fda05a2af93ea1422c26c0e570d6d7b4a4f4eeJoachim Sauer     * @hide unsupported on Android
28202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
28212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static String[] toArray(UnicodeSet set) {
28222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return addAllTo(set, new String[set.size()]);
28232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
28242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
28252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
2826f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert     * Add the contents of the collection (as strings) into this UnicodeSet.
28272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The collection must not contain null.
28282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param source the collection to add
28292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return a reference to this object
28302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
28312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet add(Iterable<?> source) {
28322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return addAll(source);
28332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
28342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
28352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
28362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Add a collection (as strings) into this UnicodeSet.
28372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Uses standard naming convention.
28382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param source collection to add into
28392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return a reference to this object
28402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
28412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet addAll(Iterable<?> source) {
28422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        checkFrozen();
28432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (Object o : source) {
28442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            add(o.toString());
28452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
28462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
28472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
28482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
28492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //----------------------------------------------------------------
28502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Implementation: Utility methods
28512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //----------------------------------------------------------------
28522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
28532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private void ensureCapacity(int newLen) {
28542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (newLen <= list.length) return;
2855f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        int[] temp = new int[newLen + GROW_EXTRA];
28562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        System.arraycopy(list, 0, temp, 0, len);
28572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        list = temp;
28582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
28592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
28602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private void ensureBufferCapacity(int newLen) {
28612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (buffer != null && newLen <= buffer.length) return;
28622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        buffer = new int[newLen + GROW_EXTRA];
28632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
28642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
28652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
28662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Assumes start <= end.
28672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
28682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int[] range(int start, int end) {
28692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (rangeList == null) {
28702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            rangeList = new int[] { start, end+1, HIGH };
28712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
28722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            rangeList[0] = start;
28732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            rangeList[1] = end+1;
28742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
28752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return rangeList;
28762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
28772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
28782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //----------------------------------------------------------------
28792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Implementation: Fundamental operations
28802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //----------------------------------------------------------------
28812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
28822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // polarity = 0, 3 is normal: x xor y
28832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // polarity = 1, 2: x xor ~y == x === y
28842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
28852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private UnicodeSet xor(int[] other, int otherLen, int polarity) {
28862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        ensureBufferCapacity(len + otherLen);
28872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int i = 0, j = 0, k = 0;
28882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int a = list[i++];
28892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int b;
28902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // TODO: Based on the call hierarchy, polarity of 1 or 2 is never used
28912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //      so the following if statement will not be called.
28922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        ///CLOVER:OFF
28932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (polarity == 1 || polarity == 2) {
28942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            b = LOW;
28952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (other[j] == LOW) { // skip base if already LOW
28962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ++j;
28972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                b = other[j];
28982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
28992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ///CLOVER:ON
29002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
29012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            b = other[j++];
29022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
29032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // simplest of all the routines
29042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // sort the values, discarding identicals!
29052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while (true) {
29062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (a < b) {
29072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer[k++] = a;
29082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                a = list[i++];
29092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if (b < a) {
29102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer[k++] = b;
29112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                b = other[j++];
29122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if (a != HIGH) { // at this point, a == b
29132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // discard both values!
29142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                a = list[i++];
29152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                b = other[j++];
29162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else { // DONE!
29172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer[k++] = HIGH;
29182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                len = k;
29192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
29202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
29212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
29222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // swap list and buffer
29232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int[] temp = list;
29242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        list = buffer;
29252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        buffer = temp;
29262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        pat = null;
29272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
29282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
29292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
29302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // polarity = 0 is normal: x union y
29312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // polarity = 2: x union ~y
29322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // polarity = 1: ~x union y
29332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // polarity = 3: ~x union ~y
29342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
29352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private UnicodeSet add(int[] other, int otherLen, int polarity) {
29362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        ensureBufferCapacity(len + otherLen);
29372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int i = 0, j = 0, k = 0;
29382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int a = list[i++];
29392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int b = other[j++];
29402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // change from xor is that we have to check overlapping pairs
29412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // polarity bit 1 means a is second, bit 2 means b is.
29422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        main:
29432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            while (true) {
29442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                switch (polarity) {
29452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                case 0: // both first; take lower if unequal
29462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (a < b) { // take a
29472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // Back up over overlapping ranges in buffer[]
29482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (k > 0 && a <= buffer[k-1]) {
29492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // Pick latter end value in buffer[] vs. list[]
29502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            a = max(list[i], buffer[--k]);
29512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        } else {
29522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // No overlap
29532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            buffer[k++] = a;
29542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            a = list[i];
29552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
29562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        i++; // Common if/else code factored out
29572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        polarity ^= 1;
29582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else if (b < a) { // take b
29592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (k > 0 && b <= buffer[k-1]) {
29602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            b = max(other[j], buffer[--k]);
29612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        } else {
29622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            buffer[k++] = b;
29632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            b = other[j];
29642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
29652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        j++;
29662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        polarity ^= 2;
29672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else { // a == b, take a, drop b
29682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (a == HIGH) break main;
29692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // This is symmetrical; it doesn't matter if
29702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // we backtrack with a or b. - liu
29712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (k > 0 && a <= buffer[k-1]) {
29722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            a = max(list[i], buffer[--k]);
29732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        } else {
29742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // No overlap
29752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            buffer[k++] = a;
29762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            a = list[i];
29772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
29782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        i++;
29792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        polarity ^= 1;
29802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        b = other[j++]; polarity ^= 2;
29812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
29822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
29832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                case 3: // both second; take higher if unequal, and drop other
29842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (b <= a) { // take a
29852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (a == HIGH) break main;
29862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        buffer[k++] = a;
29872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else { // take b
29882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (b == HIGH) break main;
29892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        buffer[k++] = b;
29902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
29912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    a = list[i++]; polarity ^= 1;   // factored common code
29922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    b = other[j++]; polarity ^= 2;
29932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
29942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                case 1: // a second, b first; if b < a, overlap
29952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (a < b) { // no overlap, take a
29962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        buffer[k++] = a; a = list[i++]; polarity ^= 1;
29972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else if (b < a) { // OVERLAP, drop b
29982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        b = other[j++]; polarity ^= 2;
29992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else { // a == b, drop both!
30002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (a == HIGH) break main;
30012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        a = list[i++]; polarity ^= 1;
30022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        b = other[j++]; polarity ^= 2;
30032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
30042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
30052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                case 2: // a first, b second; if a < b, overlap
30062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (b < a) { // no overlap, take b
30072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        buffer[k++] = b; b = other[j++]; polarity ^= 2;
30082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else  if (a < b) { // OVERLAP, drop a
30092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        a = list[i++]; polarity ^= 1;
30102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else { // a == b, drop both!
30112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (a == HIGH) break main;
30122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        a = list[i++]; polarity ^= 1;
30132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        b = other[j++]; polarity ^= 2;
30142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
30152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
30162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
30172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
30182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        buffer[k++] = HIGH;    // terminate
30192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        len = k;
30202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // swap list and buffer
30212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int[] temp = list;
30222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        list = buffer;
30232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        buffer = temp;
30242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        pat = null;
30252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
30262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
30272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
30282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // polarity = 0 is normal: x intersect y
30292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // polarity = 2: x intersect ~y == set-minus
30302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // polarity = 1: ~x intersect y
30312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // polarity = 3: ~x intersect ~y
30322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
30332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private UnicodeSet retain(int[] other, int otherLen, int polarity) {
30342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        ensureBufferCapacity(len + otherLen);
30352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int i = 0, j = 0, k = 0;
30362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int a = list[i++];
30372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int b = other[j++];
30382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // change from xor is that we have to check overlapping pairs
30392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // polarity bit 1 means a is second, bit 2 means b is.
30402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        main:
30412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            while (true) {
30422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                switch (polarity) {
30432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                case 0: // both first; drop the smaller
30442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (a < b) { // drop a
30452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        a = list[i++]; polarity ^= 1;
30462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else if (b < a) { // drop b
30472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        b = other[j++]; polarity ^= 2;
30482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else { // a == b, take one, drop other
30492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (a == HIGH) break main;
30502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        buffer[k++] = a; a = list[i++]; polarity ^= 1;
30512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        b = other[j++]; polarity ^= 2;
30522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
30532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
30542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                case 3: // both second; take lower if unequal
30552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (a < b) { // take a
30562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        buffer[k++] = a; a = list[i++]; polarity ^= 1;
30572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else if (b < a) { // take b
30582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        buffer[k++] = b; b = other[j++]; polarity ^= 2;
30592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else { // a == b, take one, drop other
30602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (a == HIGH) break main;
30612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        buffer[k++] = a; a = list[i++]; polarity ^= 1;
30622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        b = other[j++]; polarity ^= 2;
30632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
30642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
30652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                case 1: // a second, b first;
30662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (a < b) { // NO OVERLAP, drop a
30672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        a = list[i++]; polarity ^= 1;
30682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else if (b < a) { // OVERLAP, take b
30692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        buffer[k++] = b; b = other[j++]; polarity ^= 2;
30702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else { // a == b, drop both!
30712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (a == HIGH) break main;
30722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        a = list[i++]; polarity ^= 1;
30732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        b = other[j++]; polarity ^= 2;
30742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
30752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
30762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                case 2: // a first, b second; if a < b, overlap
30772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (b < a) { // no overlap, drop b
30782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        b = other[j++]; polarity ^= 2;
30792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else  if (a < b) { // OVERLAP, take a
30802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        buffer[k++] = a; a = list[i++]; polarity ^= 1;
30812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else { // a == b, drop both!
30822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (a == HIGH) break main;
30832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        a = list[i++]; polarity ^= 1;
30842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        b = other[j++]; polarity ^= 2;
30852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
30862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
30872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
30882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
30892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        buffer[k++] = HIGH;    // terminate
30902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        len = k;
30912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // swap list and buffer
30922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int[] temp = list;
30932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        list = buffer;
30942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        buffer = temp;
30952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        pat = null;
30962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
30972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
30982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
30992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int max(int a, int b) {
31002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (a > b) ? a : b;
31012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
31022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
31032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //----------------------------------------------------------------
31042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Generic filter-based scanning code
31052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //----------------------------------------------------------------
31062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
31072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static interface Filter {
31082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        boolean contains(int codePoint);
31092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
31102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
31112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static class NumericValueFilter implements Filter {
31122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        double value;
31132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        NumericValueFilter(double value) { this.value = value; }
3114f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        @Override
31152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public boolean contains(int ch) {
31162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return UCharacter.getUnicodeNumericValue(ch) == value;
31172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
31182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
31192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
31202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static class GeneralCategoryMaskFilter implements Filter {
31212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int mask;
31222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        GeneralCategoryMaskFilter(int mask) { this.mask = mask; }
3123f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        @Override
31242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public boolean contains(int ch) {
31252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return ((1 << UCharacter.getType(ch)) & mask) != 0;
31262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
31272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
31282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
31292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static class IntPropertyFilter implements Filter {
31302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int prop;
31312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int value;
31322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        IntPropertyFilter(int prop, int value) {
31332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            this.prop = prop;
31342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            this.value = value;
31352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3136f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        @Override
31372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public boolean contains(int ch) {
31382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return UCharacter.getIntPropertyValue(ch, prop) == value;
31392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
31402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
31412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
31422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static class ScriptExtensionsFilter implements Filter {
31432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int script;
31442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        ScriptExtensionsFilter(int script) { this.script = script; }
3145f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        @Override
31462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public boolean contains(int c) {
31472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return UScript.hasScript(c, script);
31482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
31492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
31502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
31512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // VersionInfo for unassigned characters
31522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final VersionInfo NO_VERSION = VersionInfo.getInstance(0, 0, 0, 0);
31532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
31542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static class VersionFilter implements Filter {
31552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        VersionInfo version;
31562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        VersionFilter(VersionInfo version) { this.version = version; }
3157f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        @Override
31582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public boolean contains(int ch) {
31592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            VersionInfo v = UCharacter.getAge(ch);
31602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Reference comparison ok; VersionInfo caches and reuses
31612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // unique objects.
3162f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert            return !Utility.sameObjects(v, NO_VERSION) &&
31632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    v.compareTo(version) <= 0;
31642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
31652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
31662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
31672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static synchronized UnicodeSet getInclusions(int src) {
31682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (INCLUSIONS == null) {
31692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            INCLUSIONS = new UnicodeSet[UCharacterProperty.SRC_COUNT];
31702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
31712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(INCLUSIONS[src] == null) {
31722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            UnicodeSet incl = new UnicodeSet();
31732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            switch(src) {
31742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case UCharacterProperty.SRC_CHAR:
31752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                UCharacterProperty.INSTANCE.addPropertyStarts(incl);
31762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
31772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case UCharacterProperty.SRC_PROPSVEC:
31782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                UCharacterProperty.INSTANCE.upropsvec_addPropertyStarts(incl);
31792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
31802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case UCharacterProperty.SRC_CHAR_AND_PROPSVEC:
31812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                UCharacterProperty.INSTANCE.addPropertyStarts(incl);
31822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                UCharacterProperty.INSTANCE.upropsvec_addPropertyStarts(incl);
31832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
31842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case UCharacterProperty.SRC_CASE_AND_NORM:
31852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                Norm2AllModes.getNFCInstance().impl.addPropertyStarts(incl);
31862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                UCaseProps.INSTANCE.addPropertyStarts(incl);
31872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
31882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case UCharacterProperty.SRC_NFC:
31892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                Norm2AllModes.getNFCInstance().impl.addPropertyStarts(incl);
31902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
31912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case UCharacterProperty.SRC_NFKC:
31922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                Norm2AllModes.getNFKCInstance().impl.addPropertyStarts(incl);
31932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
31942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case UCharacterProperty.SRC_NFKC_CF:
31952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                Norm2AllModes.getNFKC_CFInstance().impl.addPropertyStarts(incl);
31962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
31972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case UCharacterProperty.SRC_NFC_CANON_ITER:
31982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                Norm2AllModes.getNFCInstance().impl.addCanonIterPropertyStarts(incl);
31992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
32002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case UCharacterProperty.SRC_CASE:
32012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                UCaseProps.INSTANCE.addPropertyStarts(incl);
32022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
32032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case UCharacterProperty.SRC_BIDI:
32042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                UBiDiProps.INSTANCE.addPropertyStarts(incl);
32052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
32062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            default:
32072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                throw new IllegalStateException("UnicodeSet.getInclusions(unknown src "+src+")");
32082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
32092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            INCLUSIONS[src] = incl;
32102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
32112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return INCLUSIONS[src];
32122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
32132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
32142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
32152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Generic filter-based scanning code for UCD property UnicodeSets.
32162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
32172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private UnicodeSet applyFilter(Filter filter, int src) {
32182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Logically, walk through all Unicode characters, noting the start
32192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // and end of each range for which filter.contain(c) is
32202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // true.  Add each range to a set.
32212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //
32222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // To improve performance, use an inclusions set which
32232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // encodes information about character ranges that are known
32242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // to have identical properties.
32252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // getInclusions(src) contains exactly the first characters of
32262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // same-value ranges for the given properties "source".
32272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
32282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        clear();
32292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
32302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int startHasProperty = -1;
32312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        UnicodeSet inclusions = getInclusions(src);
32322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int limitRange = inclusions.getRangeCount();
32332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
32342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int j=0; j<limitRange; ++j) {
32352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // get current range
32362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int start = inclusions.getRangeStart(j);
32372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int end = inclusions.getRangeEnd(j);
32382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
32392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // for all the code points in the range, process
32402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for (int ch = start; ch <= end; ++ch) {
32412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // only add to the unicodeset on inflection points --
32422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // where the hasProperty value changes to false
32432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (filter.contains(ch)) {
32442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (startHasProperty < 0) {
32452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        startHasProperty = ch;
32462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
32472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if (startHasProperty >= 0) {
32482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    add_unchecked(startHasProperty, ch-1);
32492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    startHasProperty = -1;
32502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
32512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
32522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
32532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (startHasProperty >= 0) {
32542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            add_unchecked(startHasProperty, 0x10FFFF);
32552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
32562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
32572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
32582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
32592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
32602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
32612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
32622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Remove leading and trailing Pattern_White_Space and compress
32632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * internal Pattern_White_Space to a single space character.
32642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
32652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static String mungeCharName(String source) {
32662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        source = PatternProps.trimWhiteSpace(source);
32672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder buf = null;
32682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i=0; i<source.length(); ++i) {
32692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            char ch = source.charAt(i);
32702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (PatternProps.isWhiteSpace(ch)) {
32712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (buf == null) {
32722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buf = new StringBuilder().append(source, 0, i);
32732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if (buf.charAt(buf.length() - 1) == ' ') {
32742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    continue;
32752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
32762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ch = ' '; // convert to ' '
32772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
32782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (buf != null) {
32792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buf.append(ch);
32802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
32812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
32822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return buf == null ? source : buf.toString();
32832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
32842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
32852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //----------------------------------------------------------------
32862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Property set API
32872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //----------------------------------------------------------------
32882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
32892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
32902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Modifies this set to contain those code points which have the
32912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * given value for the given binary or enumerated property, as
32922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * returned by UCharacter.getIntPropertyValue.  Prior contents of
32932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * this set are lost.
32942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
32952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param prop a property in the range
32962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * UProperty.BIN_START..UProperty.BIN_LIMIT-1 or
32972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * UProperty.INT_START..UProperty.INT_LIMIT-1 or.
32982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * UProperty.MASK_START..UProperty.MASK_LIMIT-1.
32992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
33002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param value a value in the range
33012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * UCharacter.getIntPropertyMinValue(prop)..
33022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * UCharacter.getIntPropertyMaxValue(prop), with one exception.
33032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If prop is UProperty.GENERAL_CATEGORY_MASK, then value should not be
33042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * a UCharacter.getType() result, but rather a mask value produced
3305bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin     * by logically ORing (1 &lt;&lt; UCharacter.getType()) values together.
33062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * This allows grouped categories such as [:L:] to be represented.
33072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
33082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return a reference to this set
33092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
33102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet applyIntPropertyValue(int prop, int value) {
33112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        checkFrozen();
33122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (prop == UProperty.GENERAL_CATEGORY_MASK) {
33132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            applyFilter(new GeneralCategoryMaskFilter(value), UCharacterProperty.SRC_CHAR);
33142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if (prop == UProperty.SCRIPT_EXTENSIONS) {
33152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            applyFilter(new ScriptExtensionsFilter(value), UCharacterProperty.SRC_PROPSVEC);
33162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
33172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            applyFilter(new IntPropertyFilter(prop, value), UCharacterProperty.INSTANCE.getSource(prop));
33182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
33192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
33202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
33212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
33222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
33232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
33242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
33252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Modifies this set to contain those code points which have the
33262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * given value for the given property.  Prior contents of this
33272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * set are lost.
33282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
33292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param propertyAlias a property alias, either short or long.
33302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The name is matched loosely.  See PropertyAliases.txt for names
33312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * and a description of loose matching.  If the value string is
33322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * empty, then this string is interpreted as either a
33332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * General_Category value alias, a Script value alias, a binary
33342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * property alias, or a special ID.  Special IDs are matched
33352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * loosely and correspond to the following sets:
33362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
333706ec6d9c5ca6a2e72ac506c8729e0a31db19e211Andrew Solovay     * "ANY" = [\\u0000-\\u0010FFFF],
333806ec6d9c5ca6a2e72ac506c8729e0a31db19e211Andrew Solovay     * "ASCII" = [\\u0000-\\u007F].
33392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
33402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param valueAlias a value alias, either short or long.  The
33412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * name is matched loosely.  See PropertyValueAliases.txt for
33422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * names and a description of loose matching.  In addition to
33432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * aliases listed, numeric values and canonical combining classes
33442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * may be expressed numerically, e.g., ("nv", "0.5") or ("ccc",
33452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * "220").  The value string may also be empty.
33462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
33472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return a reference to this set
33482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
33492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet applyPropertyAlias(String propertyAlias, String valueAlias) {
33502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return applyPropertyAlias(propertyAlias, valueAlias, null);
33512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
33522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
33532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
33542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Modifies this set to contain those code points which have the
33552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * given value for the given property.  Prior contents of this
33562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * set are lost.
33572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param propertyAlias A string of the property alias.
33582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param valueAlias A string of the value alias.
33592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param symbols if not null, then symbols are first called to see if a property
33602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * is available. If true, then everything else is skipped.
33612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return this set
33622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
33632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet applyPropertyAlias(String propertyAlias,
33642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            String valueAlias, SymbolTable symbols) {
33652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        checkFrozen();
33662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int p;
33672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int v;
33682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        boolean mustNotBeEmpty = false, invert = false;
33692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
33702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (symbols != null
33712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                && (symbols instanceof XSymbolTable)
33722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                && ((XSymbolTable)symbols).applyPropertyAlias(propertyAlias, valueAlias, this)) {
33732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return this;
33742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
33752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
33762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (XSYMBOL_TABLE != null) {
33772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (XSYMBOL_TABLE.applyPropertyAlias(propertyAlias, valueAlias, this)) {
33782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return this;
33792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
33802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
33812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
33822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (valueAlias.length() > 0) {
33832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            p = UCharacter.getPropertyEnum(propertyAlias);
33842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
33852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Treat gc as gcm
33862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (p == UProperty.GENERAL_CATEGORY) {
33872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                p = UProperty.GENERAL_CATEGORY_MASK;
33882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
33892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
33902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if ((p >= UProperty.BINARY_START && p < UProperty.BINARY_LIMIT) ||
33912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    (p >= UProperty.INT_START && p < UProperty.INT_LIMIT) ||
33922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    (p >= UProperty.MASK_START && p < UProperty.MASK_LIMIT)) {
33932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                try {
33942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    v = UCharacter.getPropertyValueEnum(p, valueAlias);
33952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } catch (IllegalArgumentException e) {
33962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Handle numeric CCC
33972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (p == UProperty.CANONICAL_COMBINING_CLASS ||
33982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            p == UProperty.LEAD_CANONICAL_COMBINING_CLASS ||
33992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            p == UProperty.TRAIL_CANONICAL_COMBINING_CLASS) {
34002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        v = Integer.parseInt(PatternProps.trimWhiteSpace(valueAlias));
34012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // If the resultant set is empty then the numeric value
34022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // was invalid.
34032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        //mustNotBeEmpty = true;
34042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // old code was wrong; anything between 0 and 255 is valid even if unused.
34052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (v < 0 || v > 255) throw e;
34062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
34072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        throw e;
34082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
34092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
34102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
34112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
34122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
34132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                switch (p) {
34142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                case UProperty.NUMERIC_VALUE:
34152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                {
34162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    double value = Double.parseDouble(PatternProps.trimWhiteSpace(valueAlias));
34172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    applyFilter(new NumericValueFilter(value), UCharacterProperty.SRC_CHAR);
34182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return this;
34192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
34202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                case UProperty.NAME:
34212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                {
34222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Must munge name, since
34232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // UCharacter.charFromName() does not do
34242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // 'loose' matching.
34252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    String buf = mungeCharName(valueAlias);
34262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int ch = UCharacter.getCharFromExtendedName(buf);
34272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (ch == -1) {
34282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        throw new IllegalArgumentException("Invalid character name");
34292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
34302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    clear();
34312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    add_unchecked(ch);
34322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return this;
34332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
34342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                case UProperty.UNICODE_1_NAME:
34352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // ICU 49 deprecates the Unicode_1_Name property APIs.
34362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    throw new IllegalArgumentException("Unicode_1_Name (na1) not supported");
34372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                case UProperty.AGE:
34382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                {
34392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Must munge name, since
34402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // VersionInfo.getInstance() does not do
34412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // 'loose' matching.
34422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    VersionInfo version = VersionInfo.getInstance(mungeCharName(valueAlias));
34432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    applyFilter(new VersionFilter(version), UCharacterProperty.SRC_PROPSVEC);
34442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return this;
34452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
34462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                case UProperty.SCRIPT_EXTENSIONS:
34472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    v = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, valueAlias);
34482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // fall through to calling applyIntPropertyValue()
34492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
34502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                default:
34512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // p is a non-binary, non-enumerated property that we
34522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // don't support (yet).
34532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    throw new IllegalArgumentException("Unsupported property");
34542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
34552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
34562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
34572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
34582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else {
34592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // valueAlias is empty.  Interpret as General Category, Script,
34602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Binary property, or ANY or ASCII.  Upon success, p and v will
34612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // be set.
34622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            UPropertyAliases pnames = UPropertyAliases.INSTANCE;
34632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            p = UProperty.GENERAL_CATEGORY_MASK;
34642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            v = pnames.getPropertyValueEnum(p, propertyAlias);
34652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (v == UProperty.UNDEFINED) {
34662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                p = UProperty.SCRIPT;
34672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                v = pnames.getPropertyValueEnum(p, propertyAlias);
34682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (v == UProperty.UNDEFINED) {
34692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    p = pnames.getPropertyEnum(propertyAlias);
34702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (p == UProperty.UNDEFINED) {
34712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        p = -1;
34722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
34732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (p >= UProperty.BINARY_START && p < UProperty.BINARY_LIMIT) {
34742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        v = 1;
34752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else if (p == -1) {
34762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (0 == UPropertyAliases.compare(ANY_ID, propertyAlias)) {
34772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            set(MIN_VALUE, MAX_VALUE);
34782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            return this;
34792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        } else if (0 == UPropertyAliases.compare(ASCII_ID, propertyAlias)) {
34802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            set(0, 0x7F);
34812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            return this;
34822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        } else if (0 == UPropertyAliases.compare(ASSIGNED, propertyAlias)) {
34832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // [:Assigned:]=[:^Cn:]
34842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            p = UProperty.GENERAL_CATEGORY_MASK;
34852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            v = (1<<UCharacter.UNASSIGNED);
34862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            invert = true;
34872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        } else {
34882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // Property name was never matched.
34892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            throw new IllegalArgumentException("Invalid property alias: " + propertyAlias + "=" + valueAlias);
34902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
34912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
34922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // Valid propery name, but it isn't binary, so the value
34932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // must be supplied.
34942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        throw new IllegalArgumentException("Missing property value");
34952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
34962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
34972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
34982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
34992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
35002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        applyIntPropertyValue(p, v);
35012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(invert) {
35022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            complement();
35032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
35042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
35052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (mustNotBeEmpty && isEmpty()) {
35062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // mustNotBeEmpty is set to true if an empty set indicates
35072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // invalid input.
35082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalArgumentException("Invalid property value");
35092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
35102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
35112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
35122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
35132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
35142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //----------------------------------------------------------------
35152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Property set patterns
35162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //----------------------------------------------------------------
35172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
35182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
35192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Return true if the given position, in the given pattern, appears
35202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * to be the start of a property set pattern.
35212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
35222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static boolean resemblesPropertyPattern(String pattern, int pos) {
35232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Patterns are at least 5 characters long
35242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if ((pos+5) > pattern.length()) {
35252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return false;
35262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
35272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
35282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Look for an opening [:, [:^, \p, or \P
35292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return pattern.regionMatches(pos, "[:", 0, 2) ||
35302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                pattern.regionMatches(true, pos, "\\p", 0, 2) ||
35312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                pattern.regionMatches(pos, "\\N", 0, 2);
35322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
35332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
35342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
35352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Return true if the given iterator appears to point at a
35362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * property pattern.  Regardless of the result, return with the
35372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * iterator unchanged.
35382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param chars iterator over the pattern characters.  Upon return
35392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * it will be unchanged.
35402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param iterOpts RuleCharacterIterator options
35412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
35422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static boolean resemblesPropertyPattern(RuleCharacterIterator chars,
35432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int iterOpts) {
35442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        boolean result = false;
35452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        iterOpts &= ~RuleCharacterIterator.PARSE_ESCAPES;
35462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        Object pos = chars.getPos(null);
35472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int c = chars.next(iterOpts);
35482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (c == '[' || c == '\\') {
35492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int d = chars.next(iterOpts & ~RuleCharacterIterator.SKIP_WHITESPACE);
35502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            result = (c == '[') ? (d == ':') :
35512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                (d == 'N' || d == 'p' || d == 'P');
35522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
35532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        chars.setPos(pos);
35542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return result;
35552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
35562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
35572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
35582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Parse the given property pattern at the given parse position.
35592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param symbols TODO
35602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
35612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private UnicodeSet applyPropertyPattern(String pattern, ParsePosition ppos, SymbolTable symbols) {
35622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int pos = ppos.getIndex();
35632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
35642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // On entry, ppos should point to one of the following locations:
35652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
35662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Minimum length is 5 characters, e.g. \p{L}
35672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if ((pos+5) > pattern.length()) {
35682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return null;
35692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
35702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
35712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        boolean posix = false; // true for [:pat:], false for \p{pat} \P{pat} \N{pat}
35722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        boolean isName = false; // true for \N{pat}, o/w false
35732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        boolean invert = false;
35742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
35752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Look for an opening [:, [:^, \p, or \P
35762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (pattern.regionMatches(pos, "[:", 0, 2)) {
35772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            posix = true;
35782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            pos = PatternProps.skipWhiteSpace(pattern, (pos+2));
35792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (pos < pattern.length() && pattern.charAt(pos) == '^') {
35802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ++pos;
35812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                invert = true;
35822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
35832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if (pattern.regionMatches(true, pos, "\\p", 0, 2) ||
35842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                pattern.regionMatches(pos, "\\N", 0, 2)) {
35852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            char c = pattern.charAt(pos+1);
35862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            invert = (c == 'P');
35872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            isName = (c == 'N');
35882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            pos = PatternProps.skipWhiteSpace(pattern, (pos+2));
35892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (pos == pattern.length() || pattern.charAt(pos++) != '{') {
35902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Syntax error; "\p" or "\P" not followed by "{"
35912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return null;
35922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
35932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
35942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Open delimiter not seen
35952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return null;
35962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
35972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
35982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Look for the matching close delimiter, either :] or }
35992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int close = pattern.indexOf(posix ? ":]" : "}", pos);
36002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (close < 0) {
36012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Syntax error; close delimiter missing
36022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return null;
36032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
36042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
36052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Look for an '=' sign.  If this is present, we will parse a
36062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // medium \p{gc=Cf} or long \p{GeneralCategory=Format}
36072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // pattern.
36082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int equals = pattern.indexOf('=', pos);
36092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        String propName, valueName;
36102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (equals >= 0 && equals < close && !isName) {
36112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Equals seen; parse medium/long pattern
36122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            propName = pattern.substring(pos, equals);
36132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            valueName = pattern.substring(equals+1, close);
36142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
36152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
36162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else {
36172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Handle case where no '=' is seen, and \N{}
36182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            propName = pattern.substring(pos, close);
36192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            valueName = "";
36202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
36212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Handle \N{name}
36222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (isName) {
36232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // This is a little inefficient since it means we have to
36242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // parse "na" back to UProperty.NAME even though we already
36252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // know it's UProperty.NAME.  If we refactor the API to
36262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // support args of (int, String) then we can remove
36272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // "na" and make this a little more efficient.
36282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                valueName = propName;
36292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                propName = "na";
36302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
36312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
36322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
36332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        applyPropertyAlias(propName, valueName, symbols);
36342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
36352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (invert) {
36362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            complement();
36372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
36382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
36392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Move to the limit position after the close delimiter
36402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        ppos.setIndex(close + (posix ? 2 : 1));
36412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
36422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
36432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
36442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
36452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
36462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Parse a property pattern.
36472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param chars iterator over the pattern characters.  Upon return
36482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * it will be advanced to the first character after the parsed
36492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * pattern, or the end of the iteration if all characters are
36502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * parsed.
36512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param rebuiltPat the pattern that was parsed, rebuilt or
36522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * copied from the input pattern, as appropriate.
36532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param symbols TODO
36542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
36552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private void applyPropertyPattern(RuleCharacterIterator chars,
36562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            Appendable rebuiltPat, SymbolTable symbols) {
36572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        String patStr = chars.lookahead();
36582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        ParsePosition pos = new ParsePosition(0);
36592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        applyPropertyPattern(patStr, pos, symbols);
36602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (pos.getIndex() == 0) {
36612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            syntaxError(chars, "Invalid property pattern");
36622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
36632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        chars.jumpahead(pos.getIndex());
36642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        append(rebuiltPat, patStr.substring(0, pos.getIndex()));
36652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
36662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
36672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //----------------------------------------------------------------
36682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Case folding API
36692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //----------------------------------------------------------------
36702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
36712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
36722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Bitmask for constructor and applyPattern() indicating that
36732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * white space should be ignored.  If set, ignore Unicode Pattern_White_Space characters,
36742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * unless they are quoted or escaped.  This may be ORed together
36752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * with other selectors.
36762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
36772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int IGNORE_SPACE = 1;
36782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
36792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
36802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Bitmask for constructor, applyPattern(), and closeOver()
36812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * indicating letter case.  This may be ORed together with other
36822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * selectors.
36832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
36842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Enable case insensitive matching.  E.g., "[ab]" with this flag
36852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * will match 'a', 'A', 'b', and 'B'.  "[^ab]" with this flag will
36862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * match all except 'a', 'A', 'b', and 'B'. This performs a full
36872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * closure over case mappings, e.g. U+017F for s.
36882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
36892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The resulting set is a superset of the input for the code points but
36902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * not for the strings.
36912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * It performs a case mapping closure of the code points and adds
36922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * full case folding strings for the code points, and reduces strings of
36932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the original set to their full case folding equivalents.
36942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
36952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * This is designed for case-insensitive matches, for example
36962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * in regular expressions. The full code point case closure allows checking of
36972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * an input character directly against the closure set.
36982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Strings are matched by comparing the case-folded form from the closure
36992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * set with an incremental case folding of the string in question.
37002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
37012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The closure set will also contain single code points if the original
37022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.).
37032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * This is not necessary (that is, redundant) for the above matching method
37042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * but results in the same closure sets regardless of whether the original
37052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * set contained the code point or a string.
37062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
37072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int CASE = 2;
37082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
37092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
37102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Alias for UnicodeSet.CASE, for ease of porting from C++ where ICU4C
37112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * also has both USET_CASE and USET_CASE_INSENSITIVE (see uset.h).
37122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @see #CASE
37132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
37142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int CASE_INSENSITIVE = 2;
37152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
37162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
37172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Bitmask for constructor, applyPattern(), and closeOver()
37182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * indicating letter case.  This may be ORed together with other
37192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * selectors.
37202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
37212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Enable case insensitive matching.  E.g., "[ab]" with this flag
37222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * will match 'a', 'A', 'b', and 'B'.  "[^ab]" with this flag will
37232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * match all except 'a', 'A', 'b', and 'B'. This adds the lower-,
37242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * title-, and uppercase mappings as well as the case folding
37252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * of each existing element in the set.
37262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
37272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int ADD_CASE_MAPPINGS = 4;
37282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
37292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //  add the result of a full case mapping to the set
37302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //  use str as a temporary string to avoid constructing one
37312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final void addCaseMapping(UnicodeSet set, int result, StringBuilder full) {
37322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(result >= 0) {
37332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(result > UCaseProps.MAX_STRING_LENGTH) {
37342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // add a single-code point case mapping
37352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                set.add(result);
37362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
37372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // add a string case mapping from full with length result
37382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                set.add(full.toString());
37392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                full.setLength(0);
37402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
37412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
37422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // result < 0: the code point mapped to itself, no need to add it
37432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // see UCaseProps
37442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
37452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
37462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
37472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Close this set over the given attribute.  For the attribute
37482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * CASE, the result is to modify this set so that:
37492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
37502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * 1. For each character or string 'a' in this set, all strings
37512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * 'b' such that foldCase(a) == foldCase(b) are added to this set.
37522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * (For most 'a' that are single characters, 'b' will have
37532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * b.length() == 1.)
37542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
37552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * 2. For each string 'e' in the resulting set, if e !=
37562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * foldCase(e), 'e' will be removed.
37572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
3758bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin     * Example: [aq\u00DF{Bc}{bC}{Fi}] =&gt; [aAqQ\u00DF\uFB01{ss}{bc}{fi}]
37592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
37602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * (Here foldCase(x) refers to the operation
37612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * UCharacter.foldCase(x, true), and a == b actually denotes
37622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * a.equals(b), not pointer comparison.)
37632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
37642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param attribute bitmask for attributes to close over.
37652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Currently only the CASE bit is supported.  Any undefined bits
37662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * are ignored.
37672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return a reference to this set.
37682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
37692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet closeOver(int attribute) {
37702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        checkFrozen();
37712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if ((attribute & (CASE | ADD_CASE_MAPPINGS)) != 0) {
37722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            UCaseProps csp = UCaseProps.INSTANCE;
37732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            UnicodeSet foldSet = new UnicodeSet(this);
37742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ULocale root = ULocale.ROOT;
37752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
37762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // start with input set to guarantee inclusion
37772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // CASE: remove strings because the strings will actually be reduced (folded);
37782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //       therefore, start with no strings and add only those needed
37792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if((attribute & CASE) != 0) {
37802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                foldSet.strings.clear();
37812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
37822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
37832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int n = getRangeCount();
37842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int result;
37852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            StringBuilder full = new StringBuilder();
37862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
37872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for (int i=0; i<n; ++i) {
37882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int start = getRangeStart(i);
37892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int end   = getRangeEnd(i);
37902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
37912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if((attribute & CASE) != 0) {
37922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // full case closure
37932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    for (int cp=start; cp<=end; ++cp) {
37942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        csp.addCaseClosure(cp, foldSet);
37952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
37962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
37972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // add case mappings
37982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // (does not add long s for regular s, or Kelvin for k, for example)
37992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    for (int cp=start; cp<=end; ++cp) {
38003ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert                        result = csp.toFullLower(cp, null, full, UCaseProps.LOC_ROOT);
38012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        addCaseMapping(foldSet, result, full);
38022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
38033ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert                        result = csp.toFullTitle(cp, null, full, UCaseProps.LOC_ROOT);
38042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        addCaseMapping(foldSet, result, full);
38052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
38063ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert                        result = csp.toFullUpper(cp, null, full, UCaseProps.LOC_ROOT);
38072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        addCaseMapping(foldSet, result, full);
38082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
38092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        result = csp.toFullFolding(cp, full, 0);
38102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        addCaseMapping(foldSet, result, full);
38112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
38122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
38132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
38142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (!strings.isEmpty()) {
38152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if ((attribute & CASE) != 0) {
38162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    for (String s : strings) {
38172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        String str = UCharacter.foldCase(s, 0);
38182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if(!csp.addStringCaseClosure(str, foldSet)) {
38192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            foldSet.add(str); // does not map to code points: add the folded string itself
38202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
38212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
38222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
38232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    BreakIterator bi = BreakIterator.getWordInstance(root);
38242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    for (String str : strings) {
38253ad5c9d5a53649ad7fbd3b55cdcfef0e6036e56aFredrik Roubert                        // TODO: call lower-level functions
38262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        foldSet.add(UCharacter.toLowerCase(root, str));
38272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        foldSet.add(UCharacter.toTitleCase(root, str, bi));
38282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        foldSet.add(UCharacter.toUpperCase(root, str));
38292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        foldSet.add(UCharacter.foldCase(str, 0));
38302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
38312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
38322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
38332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            set(foldSet);
38342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
38352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
38362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
38372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
38382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
38392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Internal class for customizing UnicodeSet parsing of properties.
38402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * TODO: extend to allow customizing of codepoint ranges
38412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @author medavis
3842836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller     * @hide draft / provisional / internal are hidden on Android
38432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
38442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    abstract public static class XSymbolTable implements SymbolTable {
38452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /**
38462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Default constructor
3847836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller         * @hide draft / provisional / internal are hidden on Android
38482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
38492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public XSymbolTable(){}
38502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /**
38512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Supplies default implementation for SymbolTable (no action).
3852836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller         * @hide draft / provisional / internal are hidden on Android
38532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
3854f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        @Override
38552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public UnicodeMatcher lookupMatcher(int i) {
38562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return null;
38572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
38582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
38592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /**
38602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Override the interpretation of the sequence [:propertyName=propertyValue:] (and its negated and Perl-style
38612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * variant). The propertyName and propertyValue may be existing Unicode aliases, or may not be.
38622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * <p>
38632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * This routine will be called whenever the parsing of a UnicodeSet pattern finds such a
38642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * propertyName+propertyValue combination.
3865f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert         *
38662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * @param propertyName
38672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         *            the name of the property
38682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * @param propertyValue
38692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         *            the name of the property value
38702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * @param result UnicodeSet value to change
38712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         *            a set to which the characters having the propertyName+propertyValue are to be added.
38722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * @return returns true if the propertyName+propertyValue combination is to be overridden, and the characters
38732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         *         with that property have been added to the UnicodeSet, and returns false if the
38742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         *         propertyName+propertyValue combination is not recognized (in which case result is unaltered).
3875836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller         * @hide draft / provisional / internal are hidden on Android
38762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
38772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public boolean applyPropertyAlias(String propertyName, String propertyValue, UnicodeSet result) {
38782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return false;
38792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
38802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /**
38812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Supplies default implementation for SymbolTable (no action).
3882836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller         * @hide draft / provisional / internal are hidden on Android
38832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
3884f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        @Override
38852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public char[] lookup(String s) {
38862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return null;
38872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
38882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /**
38892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Supplies default implementation for SymbolTable (no action).
3890836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller         * @hide draft / provisional / internal are hidden on Android
38912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
3892f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        @Override
38932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public String parseReference(String text, ParsePosition pos, int limit) {
38942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return null;
38952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
38962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
38972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
38982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
38992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Is this frozen, according to the Freezable interface?
3900f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert     *
39012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return value
39022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
3903f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
39042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean isFrozen() {
39052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (bmpSet != null || stringSpan != null);
39062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
39072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
39082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
39092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Freeze this class, according to the Freezable interface.
3910f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert     *
39112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return this
39122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
3913f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
39142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet freeze() {
39152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (!isFrozen()) {
39162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Do most of what compact() does before freezing because
39172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // compact() will not work when the set is frozen.
39182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Small modification: Don't shrink if the savings would be tiny (<=GROW_EXTRA).
39192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
39202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Delete buffer first to defragment memory less.
39212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            buffer = null;
39222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (list.length > (len + GROW_EXTRA)) {
39232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Make the capacity equal to len or 1.
39242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // We don't want to realloc of 0 size.
39252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int capacity = (len == 0) ? 1 : len;
39262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int[] oldList = list;
39272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                list = new int[capacity];
39282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                for (int i = capacity; i-- > 0;) {
39292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    list[i] = oldList[i];
39302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
39312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
39322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
39332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Optimize contains() and span() and similar functions.
39342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (!strings.isEmpty()) {
39352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                stringSpan = new UnicodeSetStringSpan(this, new ArrayList<String>(strings), UnicodeSetStringSpan.ALL);
39362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
39372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (stringSpan == null || !stringSpan.needsStringSpanUTF16()) {
39382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Optimize for code point spans.
39392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // There are no strings, or
39402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // all strings are irrelevant for span() etc. because
39412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // all of each string's code points are contained in this set.
39422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // However, fully contained strings are relevant for spanAndCount(),
39432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // so we create both objects.
39442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                bmpSet = new BMPSet(list, len);
39452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
39462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
39472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
39482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
39492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
39502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
39512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Span a string using this UnicodeSet.
39522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <p>To replace, count elements, or delete spans, see {@link android.icu.text.UnicodeSetSpanner UnicodeSetSpanner}.
39532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param s The string to be spanned
39542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param spanCondition The span condition
39552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return the length of the span
39562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
39572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int span(CharSequence s, SpanCondition spanCondition) {
39582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return span(s, 0, spanCondition);
39592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
39602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
39612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
39622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Span a string using this UnicodeSet.
39632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   If the start index is less than 0, span will start from 0.
39642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   If the start index is greater than the string length, span returns the string length.
39652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <p>To replace, count elements, or delete spans, see {@link android.icu.text.UnicodeSetSpanner UnicodeSetSpanner}.
39662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param s The string to be spanned
39672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param start The start index that the span begins
39682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param spanCondition The span condition
39692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return the string index which ends the span (i.e. exclusive)
39702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
39712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int span(CharSequence s, int start, SpanCondition spanCondition) {
39722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int end = s.length();
39732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (start < 0) {
39742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            start = 0;
39752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if (start >= end) {
39762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return end;
39772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
39782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (bmpSet != null) {
39792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Frozen set without strings, or no string is relevant for span().
39802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return bmpSet.span(s, start, spanCondition, null);
39812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
39822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (stringSpan != null) {
39832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return stringSpan.span(s, start, spanCondition);
39842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if (!strings.isEmpty()) {
39852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int which = spanCondition == SpanCondition.NOT_CONTAINED ? UnicodeSetStringSpan.FWD_UTF16_NOT_CONTAINED
39862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    : UnicodeSetStringSpan.FWD_UTF16_CONTAINED;
39872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            UnicodeSetStringSpan strSpan = new UnicodeSetStringSpan(this, new ArrayList<String>(strings), which);
39882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (strSpan.needsStringSpanUTF16()) {
39892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return strSpan.span(s, start, spanCondition);
39902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
39912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
39922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
39932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return spanCodePointsAndCount(s, start, spanCondition, null);
39942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
39952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
39962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
39972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Same as span() but also counts the smallest number of set elements on any path across the span.
39982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <p>To replace, count elements, or delete spans, see {@link android.icu.text.UnicodeSetSpanner UnicodeSetSpanner}.
39992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param outCount An output-only object (must not be null) for returning the count.
40002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return the limit (exclusive end) of the span
40012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @deprecated This API is ICU internal only.
400293cf604e9dd0525f15bc0a7450b2a35f3884c298Neil Fuller     * @hide original deprecated declaration
4003836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller     * @hide draft / provisional / internal are hidden on Android
40042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
40052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    @Deprecated
40062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int spanAndCount(CharSequence s, int start, SpanCondition spanCondition, OutputInt outCount) {
40072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (outCount == null) {
40082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalArgumentException("outCount must not be null");
40092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
40102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int end = s.length();
40112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (start < 0) {
40122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            start = 0;
40132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if (start >= end) {
40142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return end;
40152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
40162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (stringSpan != null) {
40172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // We might also have bmpSet != null,
40182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // but fully-contained strings are relevant for counting elements.
40192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return stringSpan.spanAndCount(s, start, spanCondition, outCount);
40202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if (bmpSet != null) {
40212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return bmpSet.span(s, start, spanCondition, outCount);
40222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if (!strings.isEmpty()) {
40232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int which = spanCondition == SpanCondition.NOT_CONTAINED ? UnicodeSetStringSpan.FWD_UTF16_NOT_CONTAINED
40242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    : UnicodeSetStringSpan.FWD_UTF16_CONTAINED;
40252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            which |= UnicodeSetStringSpan.WITH_COUNT;
40262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            UnicodeSetStringSpan strSpan = new UnicodeSetStringSpan(this, new ArrayList<String>(strings), which);
40272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return strSpan.spanAndCount(s, start, spanCondition, outCount);
40282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
40292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
40302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return spanCodePointsAndCount(s, start, spanCondition, outCount);
40312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
40322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
40332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int spanCodePointsAndCount(CharSequence s, int start,
40342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            SpanCondition spanCondition, OutputInt outCount) {
40352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Pin to 0/1 values.
40362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        boolean spanContained = (spanCondition != SpanCondition.NOT_CONTAINED);
40372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
40382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int c;
40392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int next = start;
40402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int length = s.length();
40412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int count = 0;
40422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        do {
40432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            c = Character.codePointAt(s, next);
40442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (spanContained != contains(c)) {
40452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
40462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
40472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ++count;
40482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            next += Character.charCount(c);
40492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } while (next < length);
40502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (outCount != null) { outCount.value = count; }
40512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return next;
40522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
40532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
40542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
40552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Span a string backwards (from the end) using this UnicodeSet.
40562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <p>To replace, count elements, or delete spans, see {@link android.icu.text.UnicodeSetSpanner UnicodeSetSpanner}.
40572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param s The string to be spanned
40582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param spanCondition The span condition
40592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return The string index which starts the span (i.e. inclusive).
40602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
40612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int spanBack(CharSequence s, SpanCondition spanCondition) {
40622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return spanBack(s, s.length(), spanCondition);
40632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
40642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
40652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
40662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Span a string backwards (from the fromIndex) using this UnicodeSet.
40672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If the fromIndex is less than 0, spanBack will return 0.
40682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If fromIndex is greater than the string length, spanBack will start from the string length.
40692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <p>To replace, count elements, or delete spans, see {@link android.icu.text.UnicodeSetSpanner UnicodeSetSpanner}.
40702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param s The string to be spanned
40712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param fromIndex The index of the char (exclusive) that the string should be spanned backwards
40722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param spanCondition The span condition
40732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return The string index which starts the span (i.e. inclusive).
40742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
40752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int spanBack(CharSequence s, int fromIndex, SpanCondition spanCondition) {
40762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (fromIndex <= 0) {
40772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return 0;
40782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
40792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (fromIndex > s.length()) {
40802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fromIndex = s.length();
40812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
40822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (bmpSet != null) {
40832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Frozen set without strings, or no string is relevant for spanBack().
40842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return bmpSet.spanBack(s, fromIndex, spanCondition);
40852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
40862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (stringSpan != null) {
40872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return stringSpan.spanBack(s, fromIndex, spanCondition);
40882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if (!strings.isEmpty()) {
40892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int which = (spanCondition == SpanCondition.NOT_CONTAINED)
40902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ? UnicodeSetStringSpan.BACK_UTF16_NOT_CONTAINED
40912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            : UnicodeSetStringSpan.BACK_UTF16_CONTAINED;
40922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            UnicodeSetStringSpan strSpan = new UnicodeSetStringSpan(this, new ArrayList<String>(strings), which);
40932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (strSpan.needsStringSpanUTF16()) {
40942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return strSpan.spanBack(s, fromIndex, spanCondition);
40952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
40962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
40972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
40982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Pin to 0/1 values.
40992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        boolean spanContained = (spanCondition != SpanCondition.NOT_CONTAINED);
41002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
41012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int c;
41022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int prev = fromIndex;
41032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        do {
41042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            c = Character.codePointBefore(s, prev);
41052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (spanContained != contains(c)) {
41062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
41072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
41082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            prev -= Character.charCount(c);
41092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } while (prev > 0);
41102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return prev;
41112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
41122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
41132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
41142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Clone a thawed version of this class, according to the Freezable interface.
41152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return the clone, not frozen
41162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
4117f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
41182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet cloneAsThawed() {
41192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        UnicodeSet result = new UnicodeSet(this);
41202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        assert !result.isFrozen();
41212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return result;
41222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
41232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
41242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // internal function
41252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private void checkFrozen() {
41262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (isFrozen()) {
41272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new UnsupportedOperationException("Attempt to modify frozen object");
41282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
41292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
41302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
41312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // ************************
41322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Additional methods for integration with Generics and Collections
41332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // ************************
41342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
41352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
41362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * A struct-like class used for iteration through ranges, for faster iteration than by String.
41372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Read about the restrictions on usage in {@link UnicodeSet#ranges()}.
41382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
41392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static class EntryRange {
41402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /**
41412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * The starting code point of the range.
41422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
41432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public int codepoint;
41442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /**
41452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * The ending code point of the range
41462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
41472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public int codepointEnd;
41482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
41492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        EntryRange() {
41502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
41512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
41522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /**
41532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * {@inheritDoc}
41542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
41552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        @Override
41562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public String toString() {
41572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            StringBuilder b = new StringBuilder();
4158f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert            return (
41592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    codepoint == codepointEnd ? _appendToPat(b, codepoint, false)
41602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            : _appendToPat(_appendToPat(b, codepoint, false).append('-'), codepointEnd, false))
41612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            .toString();
41622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
41632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
41642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
41652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
41662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Provide for faster iteration than by String. Returns an Iterable/Iterator over ranges of code points.
41672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The UnicodeSet must not be altered during the iteration.
41682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The EntryRange instance is the same each time; the contents are just reset.
41692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
41702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <p><b>Warning: </b>To iterate over the full contents, you have to also iterate over the strings.
41712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
4172f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert     * <p><b>Warning: </b>For speed, UnicodeSet iteration does not check for concurrent modification.
41732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Do not alter the UnicodeSet while iterating.
4174f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert     *
41752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <pre>
41762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * // Sample code
41772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * for (EntryRange range : us1.ranges()) {
41782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *     // do something with code points between range.codepoint and range.codepointEnd;
41792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * }
41802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * for (String s : us1.strings()) {
41812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *     // do something with each string;
41822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * }
41832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * </pre>
41842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
41852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public Iterable<EntryRange> ranges() {
41862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return new EntryRangeIterable();
41872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
41882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
41892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private class EntryRangeIterable implements Iterable<EntryRange> {
4190f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        @Override
41912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public Iterator<EntryRange> iterator() {
41922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return new EntryRangeIterator();
41932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
41942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
41952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
41962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private class EntryRangeIterator implements Iterator<EntryRange> {
41972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int pos;
41982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        EntryRange result = new EntryRange();
41992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4200f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        @Override
42012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public boolean hasNext() {
42022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return pos < len-1;
42032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4204f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        @Override
42052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public EntryRange next() {
42062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (pos < len-1) {
42072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                result.codepoint = list[pos++];
42082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                result.codepointEnd = list[pos++]-1;
42092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
42102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                throw new NoSuchElementException();
42112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
42122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return result;
42132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4214f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        @Override
42152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public void remove() {
42162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new UnsupportedOperationException();
42172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
42182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
42192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
42202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
42212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
42222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Returns a string iterator. Uses the same order of iteration as {@link UnicodeSetIterator}.
4223f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert     * <p><b>Warning: </b>For speed, UnicodeSet iteration does not check for concurrent modification.
42242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Do not alter the UnicodeSet while iterating.
42252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @see java.util.Set#iterator()
42262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
4227f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
42282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public Iterator<String> iterator() {
42292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return new UnicodeSetIterator2(this);
42302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
42312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4232f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    // Cover for string iteration.
42332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static class UnicodeSetIterator2 implements Iterator<String> {
42342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Invariants:
42352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // sourceList != null then sourceList[item] is a valid character
42362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // sourceList == null then delegates to stringIterator
42372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        private int[] sourceList;
42382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        private int len;
42392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        private int item;
42402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        private int current;
42412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        private int limit;
42422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        private TreeSet<String> sourceStrings;
42432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        private Iterator<String> stringIterator;
42442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        private char[] buffer;
42452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
42462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        UnicodeSetIterator2(UnicodeSet source) {
42472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // set according to invariants
42482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            len = source.len - 1;
42492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (len > 0) {
42502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                sourceStrings = source.strings;
42512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                sourceList = source.list;
42522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                current = sourceList[item++];
42532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                limit = sourceList[item++];
42542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
42552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                stringIterator = source.strings.iterator();
42562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                sourceList = null;
42572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
42582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
42592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
42602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /* (non-Javadoc)
42612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * @see java.util.Iterator#hasNext()
42622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
4263f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        @Override
42642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public boolean hasNext() {
42652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return sourceList != null || stringIterator.hasNext();
42662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
42672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
42682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /* (non-Javadoc)
42692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * @see java.util.Iterator#next()
42702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
4271f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        @Override
42722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public String next() {
42732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (sourceList == null) {
42742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return stringIterator.next();
42752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
42762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int codepoint = current++;
42772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // we have the codepoint we need, but we may need to adjust the state
42782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (current >= limit) {
42792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (item >= len) {
42802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    stringIterator = sourceStrings.iterator();
42812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    sourceList = null;
42822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
42832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    current = sourceList[item++];
42842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    limit = sourceList[item++];
42852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
42862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
42872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Now return. Single code point is easy
42882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (codepoint <= 0xFFFF) {
42892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return String.valueOf((char)codepoint);
42902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
42912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // But Java lacks a valueOfCodePoint, so we handle ourselves for speed
42922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // allocate a buffer the first time, to make conversion faster.
42932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (buffer == null) {
42942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer = new char[2];
42952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
42962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // compute ourselves, to save tests and calls
42972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int offset = codepoint - Character.MIN_SUPPLEMENTARY_CODE_POINT;
42982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            buffer[0] = (char)((offset >>> 10) + Character.MIN_HIGH_SURROGATE);
42992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            buffer[1] = (char)((offset & 0x3ff) + Character.MIN_LOW_SURROGATE);
43002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return String.valueOf(buffer);
43012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
43022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
43032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /* (non-Javadoc)
43042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * @see java.util.Iterator#remove()
43052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
4306f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        @Override
43072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public void remove() {
43082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new UnsupportedOperationException();
4309f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        }
43102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
43112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
43122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
43132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @see #containsAll(android.icu.text.UnicodeSet)
43142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
43152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public <T extends CharSequence> boolean containsAll(Iterable<T> collection) {
43162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (T o : collection) {
43172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (!contains(o)) {
43182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return false;
43192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
43202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
43212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return true;
43222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
43232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
43242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
43252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @see #containsNone(android.icu.text.UnicodeSet)
43262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
43272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public <T extends CharSequence> boolean containsNone(Iterable<T> collection) {
43282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (T o : collection) {
43292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (contains(o)) {
43302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return false;
43312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
43322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
43332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return true;
43342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
43352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
43362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
43372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @see #containsAll(android.icu.text.UnicodeSet)
43382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
43392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final <T extends CharSequence> boolean containsSome(Iterable<T> collection) {
43402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return !containsNone(collection);
43412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
43422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
43432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
43442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @see #addAll(android.icu.text.UnicodeSet)
43452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
43462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    @SuppressWarnings("unchecked")  // See ticket #11395, this is safe.
43472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public <T extends CharSequence> UnicodeSet addAll(T... collection) {
43482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        checkFrozen();
43492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (T str : collection) {
43502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            add(str);
43512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
43522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
43532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
43542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
43552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
43562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
43572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @see #removeAll(android.icu.text.UnicodeSet)
43582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
43592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public <T extends CharSequence> UnicodeSet removeAll(Iterable<T> collection) {
43602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        checkFrozen();
43612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (T o : collection) {
43622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            remove(o);
43632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
43642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
43652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
43662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
43672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
43682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @see #retainAll(android.icu.text.UnicodeSet)
43692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
43702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public <T extends CharSequence> UnicodeSet retainAll(Iterable<T> collection) {
43712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        checkFrozen();
43722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // TODO optimize
43732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        UnicodeSet toRetain = new UnicodeSet();
43742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        toRetain.addAll(collection);
43752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        retainAll(toRetain);
43762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
43772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
43782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
43792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
43802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Comparison style enums used by {@link UnicodeSet#compareTo(UnicodeSet, ComparisonStyle)}.
43812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
43822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public enum ComparisonStyle {
43832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /**
43842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
43852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        SHORTER_FIRST,
43862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /**
43872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
43882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        LEXICOGRAPHIC,
43892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /**
43902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
43912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        LONGER_FIRST
43922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
43932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
43942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
43952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Compares UnicodeSets, where shorter come first, and otherwise lexigraphically
43962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * (according to the comparison of the first characters that differ).
43972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @see java.lang.Comparable#compareTo(java.lang.Object)
43982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
4399f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
44002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int compareTo(UnicodeSet o) {
44012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return compareTo(o, ComparisonStyle.SHORTER_FIRST);
44022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
44032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
44042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Compares UnicodeSets, in three different ways.
44052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @see java.lang.Comparable#compareTo(java.lang.Object)
44062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
44072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int compareTo(UnicodeSet o, ComparisonStyle style) {
44082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (style != ComparisonStyle.LEXICOGRAPHIC) {
44092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int diff = size() - o.size();
44102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (diff != 0) {
44112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return (diff < 0) == (style == ComparisonStyle.SHORTER_FIRST) ? -1 : 1;
44122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
44132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
44142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int result;
44152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = 0; ; ++i) {
44162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (0 != (result = list[i] - o.list[i])) {
44172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // if either list ran out, compare to the last string
44182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (list[i] == HIGH) {
44192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (strings.isEmpty()) return 1;
44202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    String item = strings.first();
44212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return compare(item, o.list[i]);
44222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
44232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (o.list[i] == HIGH) {
44242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (o.strings.isEmpty()) return -1;
44252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    String item = o.strings.first();
4426f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                    int compareResult = compare(item, list[i]);
4427f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                    return compareResult > 0 ? -1 : compareResult < 0 ? 1 : 0; // Reverse the order.
44282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
44292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // otherwise return the result if even index, or the reversal if not
44302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return (i & 1) == 0 ? result : -result;
44312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
44322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (list[i] == HIGH) {
44332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
44342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
44352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
44362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return compare(strings, o.strings);
44372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
44382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
44392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
44402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
44412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int compareTo(Iterable<String> other) {
44422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return compare(this, other);
44432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
44442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
44452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
44462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Utility to compare a string to a code point.
44472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Same results as turning the code point into a string (with the [ugly] new StringBuilder().appendCodePoint(codepoint).toString())
4448f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert     * and comparing, but much faster (no object creation).
44492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Actually, there is one difference; a null compares as less.
44502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Note that this (=String) order is UTF-16 order -- *not* code point order.
445139fda05a2af93ea1422c26c0e570d6d7b4a4f4eeJoachim Sauer     * @hide unsupported on Android
44522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
44532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
44542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static int compare(CharSequence string, int codePoint) {
44552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return CharSequences.compare(string, codePoint);
44562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
44572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
44582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
44592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Utility to compare a string to a code point.
4460f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert     * Same results as turning the code point into a string and comparing, but much faster (no object creation).
44612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Actually, there is one difference; a null compares as less.
44622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Note that this (=String) order is UTF-16 order -- *not* code point order.
446339fda05a2af93ea1422c26c0e570d6d7b4a4f4eeJoachim Sauer     * @hide unsupported on Android
44642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
44652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static int compare(int codePoint, CharSequence string) {
44662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return -CharSequences.compare(string, codePoint);
44672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
44682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
44692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
44702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
44712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Utility to compare two iterables. Warning: the ordering in iterables is important. For Collections that are ordered,
44722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * like Lists, that is expected. However, Sets in Java violate Leibniz's law when it comes to iteration.
44732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * That means that sets can't be compared directly with this method, unless they are TreeSets without
44742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * (or with the same) comparator. Unfortunately, it is impossible to reliably detect in Java whether subclass of
44752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Collection satisfies the right criteria, so it is left to the user to avoid those circumstances.
447639fda05a2af93ea1422c26c0e570d6d7b4a4f4eeJoachim Sauer     * @hide unsupported on Android
44772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
44782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static <T extends Comparable<T>> int compare(Iterable<T> collection1, Iterable<T> collection2) {
44792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return compare(collection1.iterator(), collection2.iterator());
44802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
44812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
44822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
44832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Utility to compare two iterators. Warning: the ordering in iterables is important. For Collections that are ordered,
44842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * like Lists, that is expected. However, Sets in Java violate Leibniz's law when it comes to iteration.
44852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * That means that sets can't be compared directly with this method, unless they are TreeSets without
44862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * (or with the same) comparator. Unfortunately, it is impossible to reliably detect in Java whether subclass of
44872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Collection satisfies the right criteria, so it is left to the user to avoid those circumstances.
44882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @deprecated This API is ICU internal only.
448993cf604e9dd0525f15bc0a7450b2a35f3884c298Neil Fuller     * @hide original deprecated declaration
4490836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller     * @hide draft / provisional / internal are hidden on Android
44912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
44922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    @Deprecated
44932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static <T extends Comparable<T>> int compare(Iterator<T> first, Iterator<T> other) {
44942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while (true) {
44952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (!first.hasNext()) {
44962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return other.hasNext() ? -1 : 0;
44972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if (!other.hasNext()) {
44982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return 1;
44992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
45002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            T item1 = first.next();
45012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            T item2 = other.next();
45022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int result = item1.compareTo(item2);
45032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (result != 0) {
45042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return result;
45052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
45062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
45072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
45082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
45092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
45102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
45112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Utility to compare two collections, optionally by size, and then lexicographically.
451239fda05a2af93ea1422c26c0e570d6d7b4a4f4eeJoachim Sauer     * @hide unsupported on Android
45132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
45142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static <T extends Comparable<T>> int compare(Collection<T> collection1, Collection<T> collection2, ComparisonStyle style) {
45152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (style != ComparisonStyle.LEXICOGRAPHIC) {
45162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int diff = collection1.size() - collection2.size();
45172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (diff != 0) {
45182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return (diff < 0) == (style == ComparisonStyle.SHORTER_FIRST) ? -1 : 1;
45192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
45202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
45212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return compare(collection1, collection2);
45222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
45232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
45242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
45252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Utility for adding the contents of an iterable to a collection.
452639fda05a2af93ea1422c26c0e570d6d7b4a4f4eeJoachim Sauer     * @hide unsupported on Android
45272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
45282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static <T, U extends Collection<T>> U addAllTo(Iterable<T> source, U target) {
45292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (T item : source) {
45302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            target.add(item);
45312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
45322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return target;
45332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
45342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
45352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
45362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Utility for adding the contents of an iterable to a collection.
453739fda05a2af93ea1422c26c0e570d6d7b4a4f4eeJoachim Sauer     * @hide unsupported on Android
45382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
45392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static <T> T[] addAllTo(Iterable<T> source, T[] target) {
45402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int i = 0;
45412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (T item : source) {
45422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            target[i++] = item;
45432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
45442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return target;
45452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
45462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
45472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
45482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * For iterating through the strings in the set. Example:
45492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <pre>
45502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * for (String key : myUnicodeSet.strings()) {
45512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   doSomethingWith(key);
45522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * }
45532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * </pre>
45542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
45552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public Collection<String> strings() {
45562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return Collections.unmodifiableSortedSet(strings);
45572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
45582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
45592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
45602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Return the value of the first code point, if the string is exactly one code point. Otherwise return Integer.MAX_VALUE.
45612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @deprecated This API is ICU internal only.
456293cf604e9dd0525f15bc0a7450b2a35f3884c298Neil Fuller     * @hide original deprecated declaration
4563836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller     * @hide draft / provisional / internal are hidden on Android
45642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
45652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    @Deprecated
45662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static int getSingleCodePoint(CharSequence s) {
45672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return CharSequences.getSingleCodePoint(s);
45682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
45692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
45702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
4571f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert     * Simplify the ranges in a Unicode set by merging any ranges that are only separated by characters in the dontCare set.
4572f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert     * For example, the ranges: \\u2E80-\\u2E99\\u2E9B-\\u2EF3\\u2F00-\\u2FD5\\u2FF0-\\u2FFB\\u3000-\\u303E change to \\u2E80-\\u303E
45732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * if the dontCare set includes unassigned characters (for a particular version of Unicode).
45742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param dontCare Set with the don't-care characters for spanning
45752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return the input set, modified
45762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @deprecated This API is ICU internal only.
457793cf604e9dd0525f15bc0a7450b2a35f3884c298Neil Fuller     * @hide original deprecated declaration
4578836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller     * @hide draft / provisional / internal are hidden on Android
45792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
45802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    @Deprecated
45812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeSet addBridges(UnicodeSet dontCare) {
45822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        UnicodeSet notInInput = new UnicodeSet(this).complement();
45832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (UnicodeSetIterator it = new UnicodeSetIterator(notInInput); it.nextRange();) {
45842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (it.codepoint != 0 && it.codepoint != UnicodeSetIterator.IS_STRING && it.codepointEnd != 0x10FFFF && dontCare.contains(it.codepoint,it.codepointEnd)) {
45852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                add(it.codepoint,it.codepointEnd);
45862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
45872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
45882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
45892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
45902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
45912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
45922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Find the first index at or after fromIndex where the UnicodeSet matches at that index.
45932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If findNot is true, then reverse the sense of the match: find the first place where the UnicodeSet doesn't match.
45942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If there is no match, length is returned.
45952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @deprecated This API is ICU internal only. Use span instead.
459693cf604e9dd0525f15bc0a7450b2a35f3884c298Neil Fuller     * @hide original deprecated declaration
4597836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller     * @hide draft / provisional / internal are hidden on Android
45982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
45992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    @Deprecated
46002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int findIn(CharSequence value, int fromIndex, boolean findNot) {
46012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //TODO add strings, optimize, using ICU4C algorithms
46022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int cp;
46032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (; fromIndex < value.length(); fromIndex += UTF16.getCharCount(cp)) {
46042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            cp = UTF16.charAt(value, fromIndex);
46052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (contains(cp) != findNot) {
46062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
46072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
46082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
46092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return fromIndex;
46102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
46112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
46122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
46132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Find the last index before fromIndex where the UnicodeSet matches at that index.
46142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If findNot is true, then reverse the sense of the match: find the last place where the UnicodeSet doesn't match.
46152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If there is no match, -1 is returned.
46162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * BEFORE index is not in the UnicodeSet.
46172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @deprecated This API is ICU internal only. Use spanBack instead.
461893cf604e9dd0525f15bc0a7450b2a35f3884c298Neil Fuller     * @hide original deprecated declaration
4619836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller     * @hide draft / provisional / internal are hidden on Android
46202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
46212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    @Deprecated
46222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int findLastIn(CharSequence value, int fromIndex, boolean findNot) {
46232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //TODO add strings, optimize, using ICU4C algorithms
46242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int cp;
46252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fromIndex -= 1;
46262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (; fromIndex >= 0; fromIndex -= UTF16.getCharCount(cp)) {
46272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            cp = UTF16.charAt(value, fromIndex);
46282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (contains(cp) != findNot) {
46292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
46302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
46312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
46322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return fromIndex < 0 ? -1 : fromIndex;
46332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
46342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
46352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
46362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Strips code points from source. If matches is true, script all that match <i>this</i>. If matches is false, then strip all that <i>don't</i> match.
46372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param source The source of the CharSequence to strip from.
46382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param matches A boolean to either strip all that matches or don't match with the current UnicodeSet object.
46392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return The string after it has been stripped.
46402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @deprecated This API is ICU internal only. Use replaceFrom.
464193cf604e9dd0525f15bc0a7450b2a35f3884c298Neil Fuller     * @hide original deprecated declaration
4642836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller     * @hide draft / provisional / internal are hidden on Android
46432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
46442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    @Deprecated
46452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public String stripFrom(CharSequence source, boolean matches) {
46462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder result = new StringBuilder();
46472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int pos = 0; pos < source.length();) {
46482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int inside = findIn(source, pos, !matches);
46492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            result.append(source.subSequence(pos, inside));
46502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            pos = findIn(source, inside, matches); // get next start
46512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
46522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return result.toString();
46532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
46542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
46552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
46562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Argument values for whether span() and similar functions continue while the current character is contained vs.
46572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * not contained in the set.
46582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <p>
46592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The functionality is straightforward for sets with only single code points, without strings (which is the common
46602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * case):
46612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <ul>
46622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <li>CONTAINED and SIMPLE work the same.
46632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <li>CONTAINED and SIMPLE are inverses of NOT_CONTAINED.
46642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <li>span() and spanBack() partition any string the
46652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * same way when alternating between span(NOT_CONTAINED) and span(either "contained" condition).
46662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <li>Using a
46672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * complemented (inverted) set and the opposite span conditions yields the same results.
46682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * </ul>
46692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * When a set contains multi-code point strings, then these statements may not be true, depending on the strings in
46702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the set (for example, whether they overlap with each other) and the string that is processed. For a set with
46712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * strings:
46722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <ul>
46732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <li>The complement of the set contains the opposite set of code points, but the same set of strings.
46742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Therefore, complementing both the set and the span conditions may yield different results.
46752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <li>When starting spans
46762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * at different positions in a string (span(s, ...) vs. span(s+1, ...)) the ends of the spans may be different
46772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * because a set string may start before the later position.
46782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <li>span(SIMPLE) may be shorter than
46792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * span(CONTAINED) because it will not recursively try all possible paths. For example, with a set which
46802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * contains the three strings "xy", "xya" and "ax", span("xyax", CONTAINED) will return 4 but span("xyax",
46812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * SIMPLE) will return 3. span(SIMPLE) will never be longer than span(CONTAINED).
46822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <li>With either "contained" condition, span() and spanBack() may partition a string in different ways. For example,
46832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * with a set which contains the two strings "ab" and "ba", and when processing the string "aba", span() will yield
46842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * contained/not-contained boundaries of { 0, 2, 3 } while spanBack() will yield boundaries of { 0, 1, 3 }.
46852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * </ul>
46862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Note: If it is important to get the same boundaries whether iterating forward or backward through a string, then
46872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * either only span() should be used and the boundaries cached for backward operation, or an ICU BreakIterator could
46882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * be used.
46892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <p>
46902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Note: Unpaired surrogates are treated like surrogate code points. Similarly, set strings match only on code point
46912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * boundaries, never in the middle of a surrogate pair.
46922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
46932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public enum SpanCondition {
46942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /**
46952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Continues a span() while there is no set element at the current position.
46962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Increments by one code point at a time.
46972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Stops before the first set element (character or string).
46982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * (For code points only, this is like while contains(current)==false).
46992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * <p>
47002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * When span() returns, the substring between where it started and the position it returned consists only of
47012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * characters that are not in the set, and none of its strings overlap with the span.
47022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
47032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        NOT_CONTAINED,
47042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
47052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /**
47062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Spans the longest substring that is a concatenation of set elements (characters or strings).
47072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * (For characters only, this is like while contains(current)==true).
47082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * <p>
47092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * When span() returns, the substring between where it started and the position it returned consists only of set
47102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * elements (characters or strings) that are in the set.
47112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * <p>
47122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * If a set contains strings, then the span will be the longest substring for which there
47132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * exists at least one non-overlapping concatenation of set elements (characters or strings).
47142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * This is equivalent to a POSIX regular expression for <code>(OR of each set element)*</code>.
47152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * (Java/ICU/Perl regex stops at the first match of an OR.)
47162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
47172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        CONTAINED,
47182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
47192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /**
47202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Continues a span() while there is a set element at the current position.
47212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Increments by the longest matching element at each position.
47222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * (For characters only, this is like while contains(current)==true).
47232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * <p>
47242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * When span() returns, the substring between where it started and the position it returned consists only of set
47252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * elements (characters or strings) that are in the set.
47262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * <p>
47272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * If a set only contains single characters, then this is the same as CONTAINED.
47282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * <p>
47292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * If a set contains strings, then the span will be the longest substring with a match at each position with the
47302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * longest single set element (character or string).
47312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * <p>
47322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Use this span condition together with other longest-match algorithms, such as ICU converters
47332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * (ucnv_getUnicodeSet()).
47342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
47352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        SIMPLE,
47362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
47372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /**
47382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * One more than the last span condition.
47392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
47402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        CONDITION_COUNT
47412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
47422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
47432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
47442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Get the default symbol table. Null means ordinary processing. For internal use only.
47452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return the symbol table
47462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @deprecated This API is ICU internal only.
474793cf604e9dd0525f15bc0a7450b2a35f3884c298Neil Fuller     * @hide original deprecated declaration
4748836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller     * @hide draft / provisional / internal are hidden on Android
47492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
47502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    @Deprecated
47512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static XSymbolTable getDefaultXSymbolTable() {
47522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return XSYMBOL_TABLE;
47532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
47542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
47552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
47562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Set the default symbol table. Null means ordinary processing. For internal use only. Will affect all subsequent parsing
47572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * of UnicodeSets.
47582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <p>
47592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * WARNING: If this function is used with a UnicodeProperty, and the
47602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Unassigned characters (gc=Cn) are different than in ICU other than in ICU, you MUST call
47612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * {@code UnicodeProperty.ResetCacheProperties} afterwards. If you then call {@code UnicodeSet.setDefaultXSymbolTable}
47622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * with null to clear the value, you MUST also call {@code UnicodeProperty.ResetCacheProperties}.
4763f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert     *
47642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param xSymbolTable the new default symbol table.
47652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @deprecated This API is ICU internal only.
476693cf604e9dd0525f15bc0a7450b2a35f3884c298Neil Fuller     * @hide original deprecated declaration
4767836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller     * @hide draft / provisional / internal are hidden on Android
47682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
47692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    @Deprecated
47702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static void setDefaultXSymbolTable(XSymbolTable xSymbolTable) {
4771f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        INCLUSIONS = null; // If the properties override inclusions, these have to be regenerated.
47722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        XSYMBOL_TABLE = xSymbolTable;
47732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
47742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller}
47752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller//eof
4776