12ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* GENERATED SOURCE. DO NOT MODIFY. */ 2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others. 3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License 42ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* 52ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ******************************************************************************* 62ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Copyright (C) 2001-2004, International Business Machines Corporation and * 72ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * others. All Rights Reserved. * 82ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ******************************************************************************* 92ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpackage android.icu.text; 112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.Utility; 122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/** 142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * An object that matches a fixed input string, implementing the 152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * UnicodeMatcher API. This object also implements the 162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * UnicodeReplacer API, allowing it to emit the matched text as 172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * output. Since the match text may contain flexible match elements, 182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * such as UnicodeSets, the emitted text is not the match pattern, but 192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * instead a substring of the actual matched text. Following 202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * convention, the output text is the leftmost match seen up to this 212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * point. 222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * A StringMatcher may represent a segment, in which case it has a 242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * positive segment number. This affects how the matcher converts 252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * itself to a pattern but does not otherwise affect its function. 262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * A StringMatcher that is not a segment should not be used as a 282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * UnicodeReplacer. 292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerclass StringMatcher implements UnicodeMatcher, UnicodeReplacer { 312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The text to be matched. 342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private String pattern; 362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Start offset, in the match text, of the <em>rightmost</em> 392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * match. 402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int matchStart; 42f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert 432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Limit offset, in the match text, of the <em>rightmost</em> 452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * match. 462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int matchLimit; 482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The segment number, 1-based, or 0 if not a segment. 512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int segmentNumber; 532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Context object that maps stand-ins to matcher and replacer 562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * objects. 572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final RuleBasedTransliterator.Data data; 592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Construct a matcher that matches the given pattern string. 622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param theString the pattern to be matched, possibly containing 632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * stand-ins that represent nested UnicodeMatcher objects. 642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param segmentNum the segment number from 1..n, or 0 if this is 652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * not a segment. 662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param theData context object mapping stand-ins to 672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * UnicodeMatcher objects. 682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public StringMatcher(String theString, 702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int segmentNum, 712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RuleBasedTransliterator.Data theData) { 722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller data = theData; 732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pattern = theString; 742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller matchStart = matchLimit = -1; 752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller segmentNumber = segmentNum; 762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Construct a matcher that matches a substring of the given 802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * pattern string. 812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param theString the pattern to be matched, possibly containing 822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * stand-ins that represent nested UnicodeMatcher objects. 832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param start first character of theString to be matched 842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param limit index after the last character of theString to be 852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * matched. 862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param segmentNum the segment number from 1..n, or 0 if this is 872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * not a segment. 882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param theData context object mapping stand-ins to 892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * UnicodeMatcher objects. 902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public StringMatcher(String theString, 922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int start, 932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int limit, 942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int segmentNum, 952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RuleBasedTransliterator.Data theData) { 962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller this(theString.substring(start, limit), segmentNum, theData); 972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 1002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Implement UnicodeMatcher 1012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 102f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 1032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int matches(Replaceable text, 1042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int[] offset, 1052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int limit, 1062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean incremental) { 1072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Note (1): We process text in 16-bit code units, rather than 1082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 32-bit code points. This works because stand-ins are 1092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // always in the BMP and because we are doing a literal match 1102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // operation, which can be done 16-bits at a time. 1112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i; 1122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int[] cursor = new int[] { offset[0] }; 1132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (limit < cursor[0]) { 1142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Match in the reverse direction 1152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (i=pattern.length()-1; i>=0; --i) { 1162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char keyChar = pattern.charAt(i); // OK; see note (1) above 1172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeMatcher subm = data.lookupMatcher(keyChar); 1182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (subm == null) { 1192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (cursor[0] > limit && 1202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller keyChar == text.charAt(cursor[0])) { // OK; see note (1) above 1212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller --cursor[0]; 1222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return U_MISMATCH; 1242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int m = 1272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller subm.matches(text, cursor, limit, incremental); 1282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (m != U_MATCH) { 1292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return m; 1302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Record the match position, but adjust for a normal 1342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // forward start, limit, and only if a prior match does not 1352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // exist -- we want the rightmost match. 1362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (matchStart < 0) { 1372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller matchStart = cursor[0]+1; 1382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller matchLimit = offset[0]+1; 1392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (i=0; i<pattern.length(); ++i) { 1422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (incremental && cursor[0] == limit) { 1432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We've reached the context limit without a mismatch and 1442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // without completing our match. 1452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return U_PARTIAL_MATCH; 1462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char keyChar = pattern.charAt(i); // OK; see note (1) above 1482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeMatcher subm = data.lookupMatcher(keyChar); 1492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (subm == null) { 1502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Don't need the cursor < limit check if 1512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // incremental is true (because it's done above); do need 1522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // it otherwise. 1532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (cursor[0] < limit && 1542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller keyChar == text.charAt(cursor[0])) { // OK; see note (1) above 1552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++cursor[0]; 1562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return U_MISMATCH; 1582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int m = 1612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller subm.matches(text, cursor, limit, incremental); 1622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (m != U_MATCH) { 1632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return m; 1642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Record the match position 1682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller matchStart = offset[0]; 1692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller matchLimit = cursor[0]; 1702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller offset[0] = cursor[0]; 1732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return U_MATCH; 1742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 1772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Implement UnicodeMatcher 1782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 179f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 1802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public String toPattern(boolean escapeUnprintable) { 1812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuffer result = new StringBuffer(); 1822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuffer quoteBuf = new StringBuffer(); 1832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (segmentNumber > 0) { // i.e., if this is a segment 1842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append('('); 1852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i=0; i<pattern.length(); ++i) { 1872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char keyChar = pattern.charAt(i); // OK; see note (1) above 1882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeMatcher m = data.lookupMatcher(keyChar); 1892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (m == null) { 1902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Utility.appendToRule(result, keyChar, false, escapeUnprintable, quoteBuf); 1912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Utility.appendToRule(result, m.toPattern(escapeUnprintable), 1932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller true, escapeUnprintable, quoteBuf); 1942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (segmentNumber > 0) { // i.e., if this is a segment 1972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append(')'); 1982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Flush quoteBuf out to result 2002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Utility.appendToRule(result, -1, 2012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller true, escapeUnprintable, quoteBuf); 2022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return result.toString(); 2032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Implement UnicodeMatcher 2072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 208f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 2092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean matchesIndexValue(int v) { 2102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (pattern.length() == 0) { 2112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 2122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c = UTF16.charAt(pattern, 0); 2142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeMatcher m = data.lookupMatcher(c); 2152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (m == null) ? ((c & 0xFF) == v) : m.matchesIndexValue(v); 2162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Implementation of UnicodeMatcher API. Union the set of all 2202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * characters that may be matched by this object into the given 2212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * set. 2222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param toUnionTo the set into which to union the source characters 2232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 224f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 2252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void addMatchSetTo(UnicodeSet toUnionTo) { 2262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int ch; 2272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i=0; i<pattern.length(); i+=UTF16.getCharCount(ch)) { 2282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ch = UTF16.charAt(pattern, i); 2292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeMatcher matcher = data.lookupMatcher(ch); 2302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (matcher == null) { 2312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller toUnionTo.add(ch); 2322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller matcher.addMatchSetTo(toUnionTo); 2342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * UnicodeReplacer API 2402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 241f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 2422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int replace(Replaceable text, 2432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int start, 2442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int limit, 2452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int[] cursor) { 2462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int outLen = 0; 2482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Copy segment with out-of-band data 2502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int dest = limit; 2512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If there was no match, that means that a quantifier 2522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // matched zero-length. E.g., x (a)* y matched "xy". 2532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (matchStart >= 0) { 2542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (matchStart != matchLimit) { 2552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller text.copy(matchStart, matchLimit, dest); 2562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller outLen = matchLimit - matchStart; 2572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller text.replace(start, limit, ""); // delete original text 2612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return outLen; 2632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * UnicodeReplacer API 2672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 268f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 2692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public String toReplacerPattern(boolean escapeUnprintable) { 2702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // assert(segmentNumber > 0); 2712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuffer rule = new StringBuffer("$"); 2722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Utility.appendNumber(rule, segmentNumber, 10, 1); 2732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return rule.toString(); 2742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Remove any match data. This must be called before performing a 2782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * set of matches with this segment. 2792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void resetMatch() { 2812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller matchStart = matchLimit = -1; 2822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Union the set of all characters that may output by this object 2862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * into the given set. 2872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param toUnionTo the set into which to union the output characters 2882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 289f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 2902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void addReplacementSetTo(UnicodeSet toUnionTo) { 2912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The output of this replacer varies; it is the source text between 2922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // matchStart and matchLimit. Since this varies depending on the 2932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // input text, we can't compute it here. We can either do nothing 2942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // or we can add ALL characters to the set. It's probably more useful 2952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // to do nothing. 2962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller} 2982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller//eof 300