12ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* GENERATED SOURCE. DO NOT MODIFY. */ 2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others. 3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License 42ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* 52ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller******************************************************************************* 62ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* Copyright (C) 2013-2015, International Business Machines 72ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* Corporation and others. All Rights Reserved. 82ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller******************************************************************************* 92ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* CollationRuleParser.java, ported from collationruleparser.h/.cpp 102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* C++ version created on: 2013apr10 122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* created by: Markus W. Scherer 132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*/ 142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpackage android.icu.impl.coll; 162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.text.ParseException; 182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.ArrayList; 192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.IllegalIcuArgumentException; 212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.PatternProps; 222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UCharacter; 232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UProperty; 242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.Collator; 252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.Normalizer2; 262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.UTF16; 272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.UnicodeSet; 282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.util.ULocale; 292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 301537b2f39245c07b00aa78c3600f7aebcb172490Neil Fuller/** 311537b2f39245c07b00aa78c3600f7aebcb172490Neil Fuller * @hide Only a subset of ICU is exposed in Android 32836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller */ 332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpublic final class CollationRuleParser { 342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** Special reset positions. */ 352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller enum Position { 362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller FIRST_TERTIARY_IGNORABLE, 372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller LAST_TERTIARY_IGNORABLE, 382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller FIRST_SECONDARY_IGNORABLE, 392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller LAST_SECONDARY_IGNORABLE, 402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller FIRST_PRIMARY_IGNORABLE, 412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller LAST_PRIMARY_IGNORABLE, 422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller FIRST_VARIABLE, 432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller LAST_VARIABLE, 442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller FIRST_REGULAR, 452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller LAST_REGULAR, 462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller FIRST_IMPLICIT, 472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller LAST_IMPLICIT, 482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller FIRST_TRAILING, 492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller LAST_TRAILING 502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static final Position[] POSITION_VALUES = Position.values(); 522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * First character of contractions that encode special reset positions. 552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * U+FFFE cannot be tailored via rule syntax. 562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The second contraction character is POS_BASE + Position. 582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static final char POS_LEAD = 0xfffe; 602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Base for the second character of contractions that encode special reset positions. 622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Braille characters U+28xx are printable and normalization-inert. 632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see POS_LEAD 642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static final char POS_BASE = 0x2800; 662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static abstract class Sink { 682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Adds a reset. 702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * strength=UCOL_IDENTICAL for &str. 712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * strength=UCOL_PRIMARY/UCOL_SECONDARY/UCOL_TERTIARY for &[before n]str where n=1/2/3. 722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller abstract void addReset(int strength, CharSequence str); 742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Adds a relation with strength and prefix | str / extension. 762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller abstract void addRelation(int strength, CharSequence prefix, 782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CharSequence str, CharSequence extension); 792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller void suppressContractions(UnicodeSet set) {} 812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller void optimize(UnicodeSet set) {} 832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller interface Importer { 862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String getRules(String localeID, String collationType); 872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Constructor. 912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The Sink must be set before parsing. 922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The Importer can be set, otherwise [import locale] syntax is not supported. 932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CollationRuleParser(CollationData base) { 952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller baseData = base; 962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Sets the pointer to a Sink object. 1002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The pointer is aliased: Pointer copy without cloning or taking ownership. 1012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 1022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller void setSink(Sink sinkAlias) { 1032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sink = sinkAlias; 1042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 1072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Sets the pointer to an Importer object. 1082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The pointer is aliased: Pointer copy without cloning or taking ownership. 1092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 1102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller void setImporter(Importer importerAlias) { 1112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller importer = importerAlias; 1122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller void parse(String ruleString, CollationSettings outSettings) throws ParseException { 1152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller settings = outSettings; 1162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller parse(ruleString); 1172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int UCOL_DEFAULT = -1; 1202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int UCOL_OFF = 0; 1212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int UCOL_ON = 1; 1222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** UCOL_PRIMARY=0 .. UCOL_IDENTICAL=15 */ 1242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int STRENGTH_MASK = 0xf; 1252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int STARRED_FLAG = 0x10; 1262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int OFFSET_SHIFT = 8; 1272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final String BEFORE = "[before"; 1292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // In C++, we parse into temporary UnicodeString objects named "raw" or "str". 1312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // In Java, we reuse this StringBuilder. 1322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final StringBuilder rawBuilder = new StringBuilder(); 1332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void parse(String ruleString) throws ParseException { 1352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller rules = ruleString; 1362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ruleIndex = 0; 1372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(ruleIndex < rules.length()) { 1392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char c = rules.charAt(ruleIndex); 1402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(PatternProps.isWhiteSpace(c)) { 1412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++ruleIndex; 1422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 1432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller switch(c) { 1452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case 0x26: // '&' 1462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller parseRuleChain(); 1472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 1482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case 0x5b: // '[' 1492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller parseSetting(); 1502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 1512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case 0x23: // '#' starts a comment, until the end of the line 1522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ruleIndex = skipComment(ruleIndex + 1); 1532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 1542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case 0x40: // '@' is equivalent to [backwards 2] 1552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller settings.setFlag(CollationSettings.BACKWARD_SECONDARY, true); 1562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++ruleIndex; 1572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 1582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case 0x21: // '!' used to turn on Thai/Lao character reversal 1592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Accept but ignore. The root collator has contractions 1602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // that are equivalent to the character reversal, where appropriate. 1612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++ruleIndex; 1622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 1632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller default: 1642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("expected a reset or setting or comment"); 1652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 1662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void parseRuleChain() throws ParseException { 1712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int resetStrength = parseResetAndPosition(); 1722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean isFirstRelation = true; 1732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 1742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int result = parseRelationOperator(); 1752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(result < 0) { 1762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(ruleIndex < rules.length() && rules.charAt(ruleIndex) == 0x23) { 1772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // '#' starts a comment, until the end of the line 1782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ruleIndex = skipComment(ruleIndex + 1); 1792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 1802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isFirstRelation) { 1822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("reset not followed by a relation"); 1832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 1852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int strength = result & STRENGTH_MASK; 1872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(resetStrength < Collator.IDENTICAL) { 1882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // reset-before rule chain 1892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isFirstRelation) { 1902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(strength != resetStrength) { 1912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("reset-before strength differs from its first relation"); 1922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 1932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(strength < resetStrength) { 1962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("reset-before strength followed by a stronger relation"); 1972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 1982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i = ruleIndex + (result >> OFFSET_SHIFT); // skip over the relation operator 2022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((result & STARRED_FLAG) == 0) { 2032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller parseRelationStrings(strength, i); 2042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller parseStarredCharacters(strength, i); 2062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller isFirstRelation = false; 2082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int parseResetAndPosition() throws ParseException { 2122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i = skipWhiteSpace(ruleIndex + 1); 2132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int j; 2142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char c; 2152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int resetStrength; 2162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(rules.regionMatches(i, BEFORE, 0, BEFORE.length()) && 2172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (j = i + BEFORE.length()) < rules.length() && 2182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller PatternProps.isWhiteSpace(rules.charAt(j)) && 2192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ((j = skipWhiteSpace(j + 1)) + 1) < rules.length() && 2202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 0x31 <= (c = rules.charAt(j)) && c <= 0x33 && 2212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller rules.charAt(j + 1) == 0x5d) { 2222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // &[before n] with n=1 or 2 or 3 2232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller resetStrength = Collator.PRIMARY + (c - 0x31); 2242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i = skipWhiteSpace(j + 2); 2252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller resetStrength = Collator.IDENTICAL; 2272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(i >= rules.length()) { 2292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("reset without position"); 2302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return UCOL_DEFAULT; 2312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(rules.charAt(i) == 0x5b) { // '[' 2332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i = parseSpecialPosition(i, rawBuilder); 2342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i = parseTailoringString(i, rawBuilder); 2362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 2382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sink.addReset(resetStrength, rawBuilder); 2392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch(Exception e) { 2402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("adding reset failed", e); 2412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return UCOL_DEFAULT; 2422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ruleIndex = i; 2442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return resetStrength; 2452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int parseRelationOperator() { 2482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ruleIndex = skipWhiteSpace(ruleIndex); 2492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(ruleIndex >= rules.length()) { return UCOL_DEFAULT; } 2502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int strength; 2512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i = ruleIndex; 2522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char c = rules.charAt(i++); 2532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller switch(c) { 2542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case 0x3c: // '<' 2552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(i < rules.length() && rules.charAt(i) == 0x3c) { // << 2562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++i; 2572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(i < rules.length() && rules.charAt(i) == 0x3c) { // <<< 2582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++i; 2592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(i < rules.length() && rules.charAt(i) == 0x3c) { // <<<< 2602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++i; 2612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller strength = Collator.QUATERNARY; 2622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller strength = Collator.TERTIARY; 2642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller strength = Collator.SECONDARY; 2672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller strength = Collator.PRIMARY; 2702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(i < rules.length() && rules.charAt(i) == 0x2a) { // '*' 2722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++i; 2732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller strength |= STARRED_FLAG; 2742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 2762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case 0x3b: // ';' same as << 2772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller strength = Collator.SECONDARY; 2782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 2792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case 0x2c: // ',' same as <<< 2802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller strength = Collator.TERTIARY; 2812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 2822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case 0x3d: // '=' 2832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller strength = Collator.IDENTICAL; 2842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(i < rules.length() && rules.charAt(i) == 0x2a) { // '*' 2852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++i; 2862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller strength |= STARRED_FLAG; 2872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 2892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller default: 2902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return UCOL_DEFAULT; 2912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ((i - ruleIndex) << OFFSET_SHIFT) | strength; 2932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void parseRelationStrings(int strength, int i) throws ParseException { 2962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Parse 2972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // prefix | str / extension 2982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // where prefix and extension are optional. 2992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String prefix = ""; 3002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CharSequence extension = ""; 3012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i = parseTailoringString(i, rawBuilder); 3022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char next = (i < rules.length()) ? rules.charAt(i) : 0; 3032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(next == 0x7c) { // '|' separates the context prefix from the string. 3042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prefix = rawBuilder.toString(); 3052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i = parseTailoringString(i + 1, rawBuilder); 3062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller next = (i < rules.length()) ? rules.charAt(i) : 0; 3072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // str = rawBuilder (do not modify rawBuilder any more in this function) 3092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(next == 0x2f) { // '/' separates the string from the extension. 3102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder extBuilder = new StringBuilder(); 3112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i = parseTailoringString(i + 1, extBuilder); 3122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller extension = extBuilder; 3132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(prefix.length() != 0) { 3152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int prefix0 = prefix.codePointAt(0); 3162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c = rawBuilder.codePointAt(0); 3172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!nfc.hasBoundaryBefore(prefix0) || !nfc.hasBoundaryBefore(c)) { 3182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("in 'prefix|str', prefix and str must each start with an NFC boundary"); 3192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 3202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 3232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sink.addRelation(strength, prefix, rawBuilder, extension); 3242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch(Exception e) { 3252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("adding relation failed", e); 3262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 3272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ruleIndex = i; 3292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void parseStarredCharacters(int strength, int i) throws ParseException { 3322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String empty = ""; 3332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i = parseString(skipWhiteSpace(i), rawBuilder); 3342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(rawBuilder.length() == 0) { 3352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("missing starred-relation string"); 3362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 3372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int prev = -1; 3392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int j = 0; 3402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 3412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(j < rawBuilder.length()) { 3422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c = rawBuilder.codePointAt(j); 3432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!nfd.isInert(c)) { 3442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("starred-relation string is not all NFD-inert"); 3452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 3462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 3482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sink.addRelation(strength, empty, UTF16.valueOf(c), empty); 3492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch(Exception e) { 3502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("adding relation failed", e); 3512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 3522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller j += Character.charCount(c); 3542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prev = c; 3552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(i >= rules.length() || rules.charAt(i) != 0x2d) { // '-' 3572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 3582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(prev < 0) { 3602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("range without start in starred-relation string"); 3612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 3622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i = parseString(i + 1, rawBuilder); 3642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(rawBuilder.length() == 0) { 3652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("range without end in starred-relation string"); 3662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 3672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c = rawBuilder.codePointAt(0); 3692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c < prev) { 3702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("range start greater than end in starred-relation string"); 3712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 3722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // range prev-c 3742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(++prev <= c) { 3752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!nfd.isInert(prev)) { 3762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("starred-relation string range is not all NFD-inert"); 3772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 3782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isSurrogate(prev)) { 3802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("starred-relation string range contains a surrogate"); 3812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 3822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(0xfffd <= prev && prev <= 0xffff) { 3842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("starred-relation string range contains U+FFFD, U+FFFE or U+FFFF"); 3852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 3862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 3882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sink.addRelation(strength, empty, UTF16.valueOf(prev), empty); 3892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch(Exception e) { 3902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("adding relation failed", e); 3912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 3922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prev = -1; 3952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller j = Character.charCount(c); 3962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ruleIndex = skipWhiteSpace(i); 3982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int parseTailoringString(int i, StringBuilder raw) throws ParseException { 4012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i = parseString(skipWhiteSpace(i), raw); 4022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(raw.length() == 0) { 4032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("missing relation string"); 4042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return skipWhiteSpace(i); 4062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int parseString(int i, StringBuilder raw) throws ParseException { 4092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller raw.setLength(0); 4102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(i < rules.length()) { 4112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char c = rules.charAt(i++); 4122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isSyntaxChar(c)) { 4132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c == 0x27) { // apostrophe 4142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(i < rules.length() && rules.charAt(i) == 0x27) { 4152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Double apostrophe, encodes a single one. 4162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller raw.append((char)0x27); 4172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++i; 4182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 4192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Quote literal text until the next single apostrophe. 4212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 4222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(i == rules.length()) { 4232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("quoted literal text missing terminating apostrophe"); 4242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return i; 4252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = rules.charAt(i++); 4272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c == 0x27) { 4282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(i < rules.length() && rules.charAt(i) == 0x27) { 4292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Double apostrophe inside quoted literal text, 4302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // still encodes a single apostrophe. 4312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++i; 4322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 4332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 4342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller raw.append(c); 4372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(c == 0x5c) { // backslash 4392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(i == rules.length()) { 4402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("backslash escape at the end of the rule string"); 4412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return i; 4422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int cp = rules.codePointAt(i); 4442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller raw.appendCodePoint(cp); 4452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i += Character.charCount(cp); 4462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 4472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Any other syntax character terminates a string. 4482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller --i; 4492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 4502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(PatternProps.isWhiteSpace(c)) { 4522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Unquoted white space terminates a string. 4532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller --i; 4542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 4552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 4562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller raw.append(c); 4572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int j = 0; j < raw.length();) { 4602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c = raw.codePointAt(j); 4612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isSurrogate(c)) { 4622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("string contains an unpaired surrogate"); 4632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return i; 4642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(0xfffd <= c && c <= 0xffff) { 4662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("string contains U+FFFD, U+FFFE or U+FFFF"); 4672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return i; 4682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller j += Character.charCount(c); 4702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return i; 4722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // TODO: Widen UTF16.isSurrogate(char16) to take an int. 4752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final boolean isSurrogate(int c) { 4762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (c & 0xfffff800) == 0xd800; 4772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final String[] positions = { 4802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "first tertiary ignorable", 4812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "last tertiary ignorable", 4822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "first secondary ignorable", 4832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "last secondary ignorable", 4842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "first primary ignorable", 4852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "last primary ignorable", 4862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "first variable", 4872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "last variable", 4882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "first regular", 4892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "last regular", 4902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "first implicit", 4912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "last implicit", 4922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "first trailing", 4932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "last trailing" 4942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller }; 4952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 4972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Sets str to a contraction of U+FFFE and (U+2800 + Position). 4982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return rule index after the special reset position 4992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @throws ParseException 5002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 5012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int parseSpecialPosition(int i, StringBuilder str) throws ParseException { 5022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int j = readWords(i + 1, rawBuilder); 5032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(j > i && rules.charAt(j) == 0x5d && rawBuilder.length() != 0) { // words end with ] 5042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++j; 5052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String raw = rawBuilder.toString(); 5062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.setLength(0); 5072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int pos = 0; pos < positions.length; ++pos) { 5082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(raw.equals(positions[pos])) { 5092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.append(POS_LEAD).append((char)(POS_BASE + pos)); 5102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return j; 5112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(raw.equals("top")) { 5142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.append(POS_LEAD).append((char)(POS_BASE + Position.LAST_REGULAR.ordinal())); 5152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return j; 5162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(raw.equals("variable top")) { 5182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.append(POS_LEAD).append((char)(POS_BASE + Position.LAST_VARIABLE.ordinal())); 5192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return j; 5202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("not a valid special reset position"); 5232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return i; 5242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void parseSetting() throws ParseException { 5272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i = ruleIndex + 1; 5282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int j = readWords(i, rawBuilder); 5292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(j <= i || rawBuilder.length() == 0) { 5302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("expected a setting/option at '['"); 5312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // startsWith() etc. are available for String but not CharSequence/StringBuilder. 5332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String raw = rawBuilder.toString(); 5342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(rules.charAt(j) == 0x5d) { // words end with ] 5352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++j; 5362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(raw.startsWith("reorder") && 5372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (raw.length() == 7 || raw.charAt(7) == 0x20)) { 5382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller parseReordering(raw); 5392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ruleIndex = j; 5402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 5412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(raw.equals("backwards 2")) { 5432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller settings.setFlag(CollationSettings.BACKWARD_SECONDARY, true); 5442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ruleIndex = j; 5452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 5462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String v; 5482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int valueIndex = raw.lastIndexOf(0x20); 5492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(valueIndex >= 0) { 5502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller v = raw.substring(valueIndex + 1); 5512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller raw = raw.substring(0, valueIndex); 5522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 5532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller v = ""; 5542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(raw.equals("strength") && v.length() == 1) { 5562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int value = UCOL_DEFAULT; 5572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char c = v.charAt(0); 5582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(0x31 <= c && c <= 0x34) { // 1..4 5592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller value = Collator.PRIMARY + (c - 0x31); 5602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(c == 0x49) { // 'I' 5612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller value = Collator.IDENTICAL; 5622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(value != UCOL_DEFAULT) { 5642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller settings.setStrength(value); 5652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ruleIndex = j; 5662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 5672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(raw.equals("alternate")) { 5692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int value = UCOL_DEFAULT; 5702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(v.equals("non-ignorable")) { 5712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller value = 0; // UCOL_NON_IGNORABLE 5722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(v.equals("shifted")) { 5732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller value = 1; // UCOL_SHIFTED 5742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(value != UCOL_DEFAULT) { 5762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller settings.setAlternateHandlingShifted(value > 0); 5772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ruleIndex = j; 5782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 5792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(raw.equals("maxVariable")) { 5812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int value = UCOL_DEFAULT; 5822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(v.equals("space")) { 5832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller value = CollationSettings.MAX_VAR_SPACE; 5842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(v.equals("punct")) { 5852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller value = CollationSettings.MAX_VAR_PUNCT; 5862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(v.equals("symbol")) { 5872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller value = CollationSettings.MAX_VAR_SYMBOL; 5882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(v.equals("currency")) { 5892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller value = CollationSettings.MAX_VAR_CURRENCY; 5902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(value != UCOL_DEFAULT) { 5922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller settings.setMaxVariable(value, 0); 5932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller settings.variableTop = baseData.getLastPrimaryForGroup( 5942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Collator.ReorderCodes.FIRST + value); 5952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller assert(settings.variableTop != 0); 5962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ruleIndex = j; 5972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 5982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(raw.equals("caseFirst")) { 6002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int value = UCOL_DEFAULT; 6012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(v.equals("off")) { 6022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller value = UCOL_OFF; 6032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(v.equals("lower")) { 6042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller value = CollationSettings.CASE_FIRST; // UCOL_LOWER_FIRST 6052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(v.equals("upper")) { 6062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller value = CollationSettings.CASE_FIRST_AND_UPPER_MASK; // UCOL_UPPER_FIRST 6072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(value != UCOL_DEFAULT) { 6092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller settings.setCaseFirst(value); 6102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ruleIndex = j; 6112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 6122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(raw.equals("caseLevel")) { 6142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int value = getOnOffValue(v); 6152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(value != UCOL_DEFAULT) { 6162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller settings.setFlag(CollationSettings.CASE_LEVEL, value > 0); 6172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ruleIndex = j; 6182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 6192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(raw.equals("normalization")) { 6212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int value = getOnOffValue(v); 6222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(value != UCOL_DEFAULT) { 6232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller settings.setFlag(CollationSettings.CHECK_FCD, value > 0); 6242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ruleIndex = j; 6252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 6262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(raw.equals("numericOrdering")) { 6282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int value = getOnOffValue(v); 6292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(value != UCOL_DEFAULT) { 6302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller settings.setFlag(CollationSettings.NUMERIC, value > 0); 6312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ruleIndex = j; 6322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 6332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(raw.equals("hiraganaQ")) { 6352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int value = getOnOffValue(v); 6362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(value != UCOL_DEFAULT) { 6372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(value == UCOL_ON) { 6382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("[hiraganaQ on] is not supported"); 6392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ruleIndex = j; 6412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 6422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(raw.equals("import")) { 6442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // BCP 47 language tag -> ICU locale ID 6452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ULocale localeID; 6462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 6472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller localeID = new ULocale.Builder().setLanguageTag(v).build(); 6482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch(Exception e) { 6492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("expected language tag in [import langTag]", e); 6502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 6512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // localeID minus all keywords 6532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String baseID = localeID.getBaseName(); 6542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // @collation=type, or length=0 if not specified 6552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String collationType = localeID.getKeywordValue("collation"); 6562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(importer == null) { 6572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("[import langTag] is not supported"); 6582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 6592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String importedRules; 6602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 6612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller importedRules = 6622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller importer.getRules(baseID, 6632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller collationType != null ? collationType : "standard"); 6642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch(Exception e) { 6652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("[import langTag] failed", e); 6662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 6672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String outerRules = rules; 6692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int outerRuleIndex = ruleIndex; 6702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 6712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller parse(importedRules); 6722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch(Exception e) { 6732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ruleIndex = outerRuleIndex; // Restore the original index for error reporting. 6742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("parsing imported rules failed", e); 6752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller rules = outerRules; 6772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ruleIndex = j; 6782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 6802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(rules.charAt(j) == 0x5b) { // words end with [ 6822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeSet set = new UnicodeSet(); 6832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller j = parseUnicodeSet(j, set); 6842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(raw.equals("optimize")) { 6852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 6862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sink.optimize(set); 6872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch(Exception e) { 6882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("[optimize set] failed", e); 6892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ruleIndex = j; 6912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 6922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(raw.equals("suppressContractions")) { 6932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 6942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sink.suppressContractions(set); 6952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch(Exception e) { 6962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("[suppressContractions set] failed", e); 6972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ruleIndex = j; 6992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 7002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("not a valid setting/option"); 7032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void parseReordering(CharSequence raw) throws ParseException { 7062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i = 7; // after "reorder" 7072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(i == raw.length()) { 7082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // empty [reorder] with no codes 7092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller settings.resetReordering(); 7102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 7112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Parse the codes in [reorder aa bb cc]. 7132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ArrayList<Integer> reorderCodes = new ArrayList<Integer>(); 7142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(i < raw.length()) { 7152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++i; // skip the word-separating space 7162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int limit = i; 7172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(limit < raw.length() && raw.charAt(limit) != ' ') { ++limit; } 7182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String word = raw.subSequence(i, limit).toString(); 7192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int code = getReorderCode(word); 7202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(code < 0) { 7212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("unknown script or reorder code"); 7222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 7232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderCodes.add(code); 7252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i = limit; 7262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(reorderCodes.isEmpty()) { 7282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller settings.resetReordering(); 7292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 7302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int[] codes = new int[reorderCodes.size()]; 7312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int j = 0; 7322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(Integer code : reorderCodes) { codes[j++] = code; } 7332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller settings.setReordering(baseData, codes); 7342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final String[] gSpecialReorderCodes = { 7382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller "space", "punct", "symbol", "currency", "digit" 7392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller }; 7402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 7422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Gets a script or reorder code from its string representation. 7432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return the script/reorder code, or 7442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * -1 if not recognized 7452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 7462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static int getReorderCode(String word) { 7472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int i = 0; i < gSpecialReorderCodes.length; ++i) { 7482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(word.equalsIgnoreCase(gSpecialReorderCodes[i])) { 7492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return Collator.ReorderCodes.FIRST + i; 7502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 7532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int script = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, word); 7542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(script >= 0) { 7552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return script; 7562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch (IllegalIcuArgumentException e) { 7582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // fall through 7592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(word.equalsIgnoreCase("others")) { 7612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return Collator.ReorderCodes.OTHERS; // same as Zzzz = USCRIPT_UNKNOWN 7622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 7642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static int getOnOffValue(String s) { 7672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(s.equals("on")) { 7682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return UCOL_ON; 7692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(s.equals("off")) { 7702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return UCOL_OFF; 7712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 7722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return UCOL_DEFAULT; 7732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int parseUnicodeSet(int i, UnicodeSet set) throws ParseException { 7772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Collect a UnicodeSet pattern between a balanced pair of [brackets]. 7782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int level = 0; 7792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int j = i; 7802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 7812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(j == rules.length()) { 7822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("unbalanced UnicodeSet pattern brackets"); 7832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return j; 7842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char c = rules.charAt(j++); 7862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c == 0x5b) { // '[' 7872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++level; 7882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(c == 0x5d) { // ']' 7892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(--level == 0) { break; } 7902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 7932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.applyPattern(rules.substring(i, j)); 7942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch(Exception e) { 7952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("not a valid UnicodeSet pattern: " + e.getMessage()); 7962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller j = skipWhiteSpace(j); 7982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(j == rules.length() || rules.charAt(j) != 0x5d) { 7992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setParseError("missing option-terminating ']' after UnicodeSet pattern"); 8002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return j; 8012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ++j; 8032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int readWords(int i, StringBuilder raw) { 8062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller raw.setLength(0); 8072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i = skipWhiteSpace(i); 8082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 8092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(i >= rules.length()) { return 0; } 8102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char c = rules.charAt(i); 8112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isSyntaxChar(c) && c != 0x2d && c != 0x5f) { // syntax except -_ 8122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(raw.length() == 0) { return i; } 8132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int lastIndex = raw.length() - 1; 8142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(raw.charAt(lastIndex) == ' ') { // remove trailing space 8152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller raw.setLength(lastIndex); 8162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return i; 8182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(PatternProps.isWhiteSpace(c)) { 8202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller raw.append(' '); 8212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i = skipWhiteSpace(i + 1); 8222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 8232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller raw.append(c); 8242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++i; 8252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int skipComment(int i) { 8302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // skip to past the newline 8312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(i < rules.length()) { 8322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char c = rules.charAt(i++); 8332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // LF or FF or CR or NEL or LS or PS 8342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c == 0xa || c == 0xc || c == 0xd || c == 0x85 || c == 0x2028 || c == 0x2029) { 8352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Unicode Newline Guidelines: "A readline function should stop at NLF, LS, FF, or PS." 8362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // NLF (new line function) = CR or LF or CR+LF or NEL. 8372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // No need to collect all of CR+LF because a following LF will be ignored anyway. 8382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 8392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return i; 8422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void setParseError(String reason) throws ParseException { 8452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw makeParseException(reason); 8462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void setParseError(String reason, Exception e) throws ParseException { 8492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ParseException newExc = makeParseException(reason + ": " + e.getMessage()); 8502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newExc.initCause(e); 8512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw newExc; 8522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private ParseException makeParseException(String reason) { 8552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return new ParseException(appendErrorContext(reason), ruleIndex); 8562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int U_PARSE_CONTEXT_LEN = 16; 8592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // C++ setErrorContext() 8612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private String appendErrorContext(String reason) { 8622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Note: This relies on the calling code maintaining the ruleIndex 8632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // at a position that is useful for debugging. 8642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // For example, at the beginning of a reset or relation etc. 8652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder msg = new StringBuilder(reason); 8662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller msg.append(" at index ").append(ruleIndex); 8672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We are not counting line numbers. 8682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller msg.append(" near \""); 8702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // before ruleIndex 8712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int start = ruleIndex - (U_PARSE_CONTEXT_LEN - 1); 8722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(start < 0) { 8732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller start = 0; 8742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(start > 0 && Character.isLowSurrogate(rules.charAt(start))) { 8752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++start; 8762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller msg.append(rules, start, ruleIndex); 8782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller msg.append('!'); 8802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // starting from ruleIndex 8812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int length = rules.length() - ruleIndex; 8822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(length >= U_PARSE_CONTEXT_LEN) { 8832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller length = U_PARSE_CONTEXT_LEN - 1; 8842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(Character.isHighSurrogate(rules.charAt(ruleIndex + length - 1))) { 8852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller --length; 8862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller msg.append(rules, ruleIndex, ruleIndex + length); 8892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return msg.append('\"').toString(); 8902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 8932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * ASCII [:P:] and [:S:]: 8942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * [\u0021-\u002F \u003A-\u0040 \u005B-\u0060 \u007B-\u007E] 8952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 8962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static boolean isSyntaxChar(int c) { 8972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0x21 <= c && c <= 0x7e && 8982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (c <= 0x2f || (0x3a <= c && c <= 0x40) || 8992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (0x5b <= c && c <= 0x60) || (0x7b <= c)); 9002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int skipWhiteSpace(int i) { 9032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(i < rules.length() && PatternProps.isWhiteSpace(rules.charAt(i))) { 9042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++i; 9052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return i; 9072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private Normalizer2 nfd = Normalizer2.getNFDInstance(); 9102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private Normalizer2 nfc = Normalizer2.getNFCInstance(); 9112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private String rules; 9132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final CollationData baseData; 9142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private CollationSettings settings; 9152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private Sink sink; 9172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private Importer importer; 9182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int ruleIndex; 9202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller} 921