17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/* 27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert******************************************************************************* 3f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert* Copyright (C) 2013-2015, International Business Machines 47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Corporation and others. All Rights Reserved. 57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert******************************************************************************* 67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* CollationRuleParser.java, ported from collationruleparser.h/.cpp 77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* 87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* C++ version created on: 2013apr10 97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* created by: Markus W. Scherer 107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*/ 117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.impl.coll; 137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.text.ParseException; 157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.ArrayList; 167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.IllegalIcuArgumentException; 187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.PatternProps; 197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UCharacter; 207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UProperty; 217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.Collator; 227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.Normalizer2; 237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.UTF16; 247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.UnicodeSet; 257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.ULocale; 267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic final class CollationRuleParser { 287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** Special reset positions. */ 297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert enum Position { 307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert FIRST_TERTIARY_IGNORABLE, 317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert LAST_TERTIARY_IGNORABLE, 327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert FIRST_SECONDARY_IGNORABLE, 337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert LAST_SECONDARY_IGNORABLE, 347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert FIRST_PRIMARY_IGNORABLE, 357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert LAST_PRIMARY_IGNORABLE, 367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert FIRST_VARIABLE, 377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert LAST_VARIABLE, 387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert FIRST_REGULAR, 397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert LAST_REGULAR, 407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert FIRST_IMPLICIT, 417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert LAST_IMPLICIT, 427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert FIRST_TRAILING, 437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert LAST_TRAILING 447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static final Position[] POSITION_VALUES = Position.values(); 467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * First character of contractions that encode special reset positions. 497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * U+FFFE cannot be tailored via rule syntax. 507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The second contraction character is POS_BASE + Position. 527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static final char POS_LEAD = 0xfffe; 547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Base for the second character of contractions that encode special reset positions. 567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Braille characters U+28xx are printable and normalization-inert. 577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see POS_LEAD 587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static final char POS_BASE = 0x2800; 607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static abstract class Sink { 627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Adds a reset. 647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * strength=UCOL_IDENTICAL for &str. 657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * strength=UCOL_PRIMARY/UCOL_SECONDARY/UCOL_TERTIARY for &[before n]str where n=1/2/3. 667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert abstract void addReset(int strength, CharSequence str); 687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Adds a relation with strength and prefix | str / extension. 707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert abstract void addRelation(int strength, CharSequence prefix, 727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CharSequence str, CharSequence extension); 737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert void suppressContractions(UnicodeSet set) {} 757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert void optimize(UnicodeSet set) {} 777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert interface Importer { 807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String getRules(String localeID, String collationType); 817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Constructor. 857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The Sink must be set before parsing. 867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The Importer can be set, otherwise [import locale] syntax is not supported. 877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CollationRuleParser(CollationData base) { 897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert baseData = base; 907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Sets the pointer to a Sink object. 947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The pointer is aliased: Pointer copy without cloning or taking ownership. 957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert void setSink(Sink sinkAlias) { 977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert sink = sinkAlias; 987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Sets the pointer to an Importer object. 1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The pointer is aliased: Pointer copy without cloning or taking ownership. 1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert void setImporter(Importer importerAlias) { 1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert importer = importerAlias; 1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert void parse(String ruleString, CollationSettings outSettings) throws ParseException { 1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert settings = outSettings; 1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert parse(ruleString); 1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final int UCOL_DEFAULT = -1; 1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final int UCOL_OFF = 0; 1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final int UCOL_ON = 1; 1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** UCOL_PRIMARY=0 .. UCOL_IDENTICAL=15 */ 1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final int STRENGTH_MASK = 0xf; 1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final int STARRED_FLAG = 0x10; 1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final int OFFSET_SHIFT = 8; 1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final String BEFORE = "[before"; 1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // In C++, we parse into temporary UnicodeString objects named "raw" or "str". 1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // In Java, we reuse this StringBuilder. 1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private final StringBuilder rawBuilder = new StringBuilder(); 1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private void parse(String ruleString) throws ParseException { 1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert rules = ruleString; 1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ruleIndex = 0; 1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while(ruleIndex < rules.length()) { 1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char c = rules.charAt(ruleIndex); 1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(PatternProps.isWhiteSpace(c)) { 1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++ruleIndex; 1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert continue; 1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert switch(c) { 1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert case 0x26: // '&' 1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert parseRuleChain(); 1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert case 0x5b: // '[' 1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert parseSetting(); 1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert case 0x23: // '#' starts a comment, until the end of the line 1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ruleIndex = skipComment(ruleIndex + 1); 1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert case 0x40: // '@' is equivalent to [backwards 2] 1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert settings.setFlag(CollationSettings.BACKWARD_SECONDARY, true); 1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++ruleIndex; 1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert case 0x21: // '!' used to turn on Thai/Lao character reversal 1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Accept but ignore. The root collator has contractions 1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // that are equivalent to the character reversal, where appropriate. 1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++ruleIndex; 1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert default: 1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("expected a reset or setting or comment"); 1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private void parseRuleChain() throws ParseException { 1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int resetStrength = parseResetAndPosition(); 1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert boolean isFirstRelation = true; 1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for(;;) { 1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int result = parseRelationOperator(); 1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(result < 0) { 1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(ruleIndex < rules.length() && rules.charAt(ruleIndex) == 0x23) { 1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // '#' starts a comment, until the end of the line 1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ruleIndex = skipComment(ruleIndex + 1); 1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert continue; 1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(isFirstRelation) { 1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("reset not followed by a relation"); 1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int strength = result & STRENGTH_MASK; 1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(resetStrength < Collator.IDENTICAL) { 1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // reset-before rule chain 1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(isFirstRelation) { 1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(strength != resetStrength) { 1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("reset-before strength differs from its first relation"); 1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(strength < resetStrength) { 1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("reset-before strength followed by a stronger relation"); 1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int i = ruleIndex + (result >> OFFSET_SHIFT); // skip over the relation operator 1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if((result & STARRED_FLAG) == 0) { 1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert parseRelationStrings(strength, i); 1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert parseStarredCharacters(strength, i); 2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert isFirstRelation = false; 2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int parseResetAndPosition() throws ParseException { 2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int i = skipWhiteSpace(ruleIndex + 1); 2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int j; 2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char c; 2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int resetStrength; 2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(rules.regionMatches(i, BEFORE, 0, BEFORE.length()) && 2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert (j = i + BEFORE.length()) < rules.length() && 2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert PatternProps.isWhiteSpace(rules.charAt(j)) && 2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ((j = skipWhiteSpace(j + 1)) + 1) < rules.length() && 2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 0x31 <= (c = rules.charAt(j)) && c <= 0x33 && 2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert rules.charAt(j + 1) == 0x5d) { 2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // &[before n] with n=1 or 2 or 3 2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert resetStrength = Collator.PRIMARY + (c - 0x31); 2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert i = skipWhiteSpace(j + 2); 2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert resetStrength = Collator.IDENTICAL; 2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(i >= rules.length()) { 2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("reset without position"); 2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return UCOL_DEFAULT; 2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(rules.charAt(i) == 0x5b) { // '[' 2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert i = parseSpecialPosition(i, rawBuilder); 2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert i = parseTailoringString(i, rawBuilder); 2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert try { 2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert sink.addReset(resetStrength, rawBuilder); 2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } catch(Exception e) { 2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("adding reset failed", e); 2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return UCOL_DEFAULT; 2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ruleIndex = i; 2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return resetStrength; 2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int parseRelationOperator() { 2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ruleIndex = skipWhiteSpace(ruleIndex); 2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(ruleIndex >= rules.length()) { return UCOL_DEFAULT; } 2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int strength; 2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int i = ruleIndex; 2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char c = rules.charAt(i++); 2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert switch(c) { 2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert case 0x3c: // '<' 2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(i < rules.length() && rules.charAt(i) == 0x3c) { // << 2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++i; 2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(i < rules.length() && rules.charAt(i) == 0x3c) { // <<< 2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++i; 2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(i < rules.length() && rules.charAt(i) == 0x3c) { // <<<< 2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++i; 2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert strength = Collator.QUATERNARY; 2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert strength = Collator.TERTIARY; 2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert strength = Collator.SECONDARY; 2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert strength = Collator.PRIMARY; 2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(i < rules.length() && rules.charAt(i) == 0x2a) { // '*' 2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++i; 2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert strength |= STARRED_FLAG; 2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert case 0x3b: // ';' same as << 2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert strength = Collator.SECONDARY; 2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert case 0x2c: // ',' same as <<< 2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert strength = Collator.TERTIARY; 2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert case 0x3d: // '=' 2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert strength = Collator.IDENTICAL; 2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(i < rules.length() && rules.charAt(i) == 0x2a) { // '*' 2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++i; 2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert strength |= STARRED_FLAG; 2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert default: 2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return UCOL_DEFAULT; 2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return ((i - ruleIndex) << OFFSET_SHIFT) | strength; 2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private void parseRelationStrings(int strength, int i) throws ParseException { 2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Parse 2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // prefix | str / extension 2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // where prefix and extension are optional. 2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String prefix = ""; 2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CharSequence extension = ""; 2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert i = parseTailoringString(i, rawBuilder); 2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char next = (i < rules.length()) ? rules.charAt(i) : 0; 2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(next == 0x7c) { // '|' separates the context prefix from the string. 2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert prefix = rawBuilder.toString(); 2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert i = parseTailoringString(i + 1, rawBuilder); 3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert next = (i < rules.length()) ? rules.charAt(i) : 0; 3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // str = rawBuilder (do not modify rawBuilder any more in this function) 3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(next == 0x2f) { // '/' separates the string from the extension. 3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuilder extBuilder = new StringBuilder(); 3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert i = parseTailoringString(i + 1, extBuilder); 3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert extension = extBuilder; 3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(prefix.length() != 0) { 3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int prefix0 = prefix.codePointAt(0); 3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int c = rawBuilder.codePointAt(0); 3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(!nfc.hasBoundaryBefore(prefix0) || !nfc.hasBoundaryBefore(c)) { 3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("in 'prefix|str', prefix and str must each start with an NFC boundary"); 3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert try { 3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert sink.addRelation(strength, prefix, rawBuilder, extension); 3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } catch(Exception e) { 3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("adding relation failed", e); 3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ruleIndex = i; 3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private void parseStarredCharacters(int strength, int i) throws ParseException { 3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String empty = ""; 3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert i = parseString(skipWhiteSpace(i), rawBuilder); 3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(rawBuilder.length() == 0) { 3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("missing starred-relation string"); 3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int prev = -1; 3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int j = 0; 3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for(;;) { 3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while(j < rawBuilder.length()) { 3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int c = rawBuilder.codePointAt(j); 3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(!nfd.isInert(c)) { 3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("starred-relation string is not all NFD-inert"); 3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert try { 3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert sink.addRelation(strength, empty, UTF16.valueOf(c), empty); 3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } catch(Exception e) { 3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("adding relation failed", e); 3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert j += Character.charCount(c); 3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert prev = c; 3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(i >= rules.length() || rules.charAt(i) != 0x2d) { // '-' 3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(prev < 0) { 3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("range without start in starred-relation string"); 3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert i = parseString(i + 1, rawBuilder); 3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(rawBuilder.length() == 0) { 3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("range without end in starred-relation string"); 3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 3617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int c = rawBuilder.codePointAt(0); 3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(c < prev) { 3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("range start greater than end in starred-relation string"); 3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // range prev-c 3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while(++prev <= c) { 3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(!nfd.isInert(prev)) { 3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("starred-relation string range is not all NFD-inert"); 3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(isSurrogate(prev)) { 3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("starred-relation string range contains a surrogate"); 3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(0xfffd <= prev && prev <= 0xffff) { 3787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("starred-relation string range contains U+FFFD, U+FFFE or U+FFFF"); 3797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 3807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert try { 3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert sink.addRelation(strength, empty, UTF16.valueOf(prev), empty); 3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } catch(Exception e) { 3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("adding relation failed", e); 3857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert prev = -1; 3897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert j = Character.charCount(c); 3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ruleIndex = skipWhiteSpace(i); 3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int parseTailoringString(int i, StringBuilder raw) throws ParseException { 3957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert i = parseString(skipWhiteSpace(i), raw); 3967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(raw.length() == 0) { 3977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("missing relation string"); 3987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return skipWhiteSpace(i); 4007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int parseString(int i, StringBuilder raw) throws ParseException { 4037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert raw.setLength(0); 4047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while(i < rules.length()) { 4057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char c = rules.charAt(i++); 4067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(isSyntaxChar(c)) { 4077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(c == 0x27) { // apostrophe 4087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(i < rules.length() && rules.charAt(i) == 0x27) { 4097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Double apostrophe, encodes a single one. 4107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert raw.append((char)0x27); 4117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++i; 4127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert continue; 4137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Quote literal text until the next single apostrophe. 4157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for(;;) { 4167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(i == rules.length()) { 4177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("quoted literal text missing terminating apostrophe"); 4187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return i; 4197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert c = rules.charAt(i++); 4217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(c == 0x27) { 4227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(i < rules.length() && rules.charAt(i) == 0x27) { 4237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Double apostrophe inside quoted literal text, 4247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // still encodes a single apostrophe. 4257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++i; 4267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 4277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 4287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert raw.append(c); 4317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(c == 0x5c) { // backslash 4337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(i == rules.length()) { 4347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("backslash escape at the end of the rule string"); 4357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return i; 4367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int cp = rules.codePointAt(i); 4387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert raw.appendCodePoint(cp); 4397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert i += Character.charCount(cp); 4407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 4417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Any other syntax character terminates a string. 4427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert --i; 4437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 4447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(PatternProps.isWhiteSpace(c)) { 4467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Unquoted white space terminates a string. 4477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert --i; 4487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 4497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 4507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert raw.append(c); 4517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for(int j = 0; j < raw.length();) { 4547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int c = raw.codePointAt(j); 4557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(isSurrogate(c)) { 4567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("string contains an unpaired surrogate"); 4577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return i; 4587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(0xfffd <= c && c <= 0xffff) { 4607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("string contains U+FFFD, U+FFFE or U+FFFF"); 4617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return i; 4627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert j += Character.charCount(c); 4647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return i; 4667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // TODO: Widen UTF16.isSurrogate(char16) to take an int. 4697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final boolean isSurrogate(int c) { 4707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return (c & 0xfffff800) == 0xd800; 4717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final String[] positions = { 4747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "first tertiary ignorable", 4757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "last tertiary ignorable", 4767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "first secondary ignorable", 4777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "last secondary ignorable", 4787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "first primary ignorable", 4797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "last primary ignorable", 4807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "first variable", 4817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "last variable", 4827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "first regular", 4837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "last regular", 4847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "first implicit", 4857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "last implicit", 4867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "first trailing", 4877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "last trailing" 4887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert }; 4897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 4917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Sets str to a contraction of U+FFFE and (U+2800 + Position). 4927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return rule index after the special reset position 4937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @throws ParseException 4947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 4957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int parseSpecialPosition(int i, StringBuilder str) throws ParseException { 4967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int j = readWords(i + 1, rawBuilder); 4977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(j > i && rules.charAt(j) == 0x5d && rawBuilder.length() != 0) { // words end with ] 4987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++j; 4997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String raw = rawBuilder.toString(); 5007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert str.setLength(0); 5017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for(int pos = 0; pos < positions.length; ++pos) { 5027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(raw.equals(positions[pos])) { 5037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert str.append(POS_LEAD).append((char)(POS_BASE + pos)); 5047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return j; 5057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(raw.equals("top")) { 5087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert str.append(POS_LEAD).append((char)(POS_BASE + Position.LAST_REGULAR.ordinal())); 5097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return j; 5107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(raw.equals("variable top")) { 5127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert str.append(POS_LEAD).append((char)(POS_BASE + Position.LAST_VARIABLE.ordinal())); 5137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return j; 5147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("not a valid special reset position"); 5177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return i; 5187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private void parseSetting() throws ParseException { 5217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int i = ruleIndex + 1; 5227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int j = readWords(i, rawBuilder); 5237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(j <= i || rawBuilder.length() == 0) { 5247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("expected a setting/option at '['"); 5257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // startsWith() etc. are available for String but not CharSequence/StringBuilder. 5277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String raw = rawBuilder.toString(); 5287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(rules.charAt(j) == 0x5d) { // words end with ] 5297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++j; 5307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(raw.startsWith("reorder") && 5317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert (raw.length() == 7 || raw.charAt(7) == 0x20)) { 5327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert parseReordering(raw); 5337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ruleIndex = j; 5347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 5357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(raw.equals("backwards 2")) { 5377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert settings.setFlag(CollationSettings.BACKWARD_SECONDARY, true); 5387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ruleIndex = j; 5397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 5407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String v; 5427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int valueIndex = raw.lastIndexOf(0x20); 5437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(valueIndex >= 0) { 5447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert v = raw.substring(valueIndex + 1); 5457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert raw = raw.substring(0, valueIndex); 5467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 5477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert v = ""; 5487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(raw.equals("strength") && v.length() == 1) { 5507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int value = UCOL_DEFAULT; 5517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char c = v.charAt(0); 5527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(0x31 <= c && c <= 0x34) { // 1..4 5537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert value = Collator.PRIMARY + (c - 0x31); 5547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(c == 0x49) { // 'I' 5557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert value = Collator.IDENTICAL; 5567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(value != UCOL_DEFAULT) { 5587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert settings.setStrength(value); 5597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ruleIndex = j; 5607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 5617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(raw.equals("alternate")) { 5637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int value = UCOL_DEFAULT; 5647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(v.equals("non-ignorable")) { 5657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert value = 0; // UCOL_NON_IGNORABLE 5667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(v.equals("shifted")) { 5677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert value = 1; // UCOL_SHIFTED 5687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(value != UCOL_DEFAULT) { 5707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert settings.setAlternateHandlingShifted(value > 0); 5717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ruleIndex = j; 5727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 5737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(raw.equals("maxVariable")) { 5757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int value = UCOL_DEFAULT; 5767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(v.equals("space")) { 5777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert value = CollationSettings.MAX_VAR_SPACE; 5787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(v.equals("punct")) { 5797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert value = CollationSettings.MAX_VAR_PUNCT; 5807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(v.equals("symbol")) { 5817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert value = CollationSettings.MAX_VAR_SYMBOL; 5827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(v.equals("currency")) { 5837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert value = CollationSettings.MAX_VAR_CURRENCY; 5847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(value != UCOL_DEFAULT) { 5867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert settings.setMaxVariable(value, 0); 5877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert settings.variableTop = baseData.getLastPrimaryForGroup( 5887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Collator.ReorderCodes.FIRST + value); 5897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assert(settings.variableTop != 0); 5907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ruleIndex = j; 5917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 5927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(raw.equals("caseFirst")) { 5947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int value = UCOL_DEFAULT; 5957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(v.equals("off")) { 5967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert value = UCOL_OFF; 5977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(v.equals("lower")) { 5987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert value = CollationSettings.CASE_FIRST; // UCOL_LOWER_FIRST 5997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(v.equals("upper")) { 6007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert value = CollationSettings.CASE_FIRST_AND_UPPER_MASK; // UCOL_UPPER_FIRST 6017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(value != UCOL_DEFAULT) { 6037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert settings.setCaseFirst(value); 6047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ruleIndex = j; 6057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 6067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(raw.equals("caseLevel")) { 6087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int value = getOnOffValue(v); 6097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(value != UCOL_DEFAULT) { 6107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert settings.setFlag(CollationSettings.CASE_LEVEL, value > 0); 6117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ruleIndex = j; 6127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 6137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(raw.equals("normalization")) { 6157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int value = getOnOffValue(v); 6167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(value != UCOL_DEFAULT) { 6177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert settings.setFlag(CollationSettings.CHECK_FCD, value > 0); 6187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ruleIndex = j; 6197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 6207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(raw.equals("numericOrdering")) { 6227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int value = getOnOffValue(v); 6237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(value != UCOL_DEFAULT) { 6247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert settings.setFlag(CollationSettings.NUMERIC, value > 0); 6257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ruleIndex = j; 6267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 6277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(raw.equals("hiraganaQ")) { 6297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int value = getOnOffValue(v); 6307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(value != UCOL_DEFAULT) { 6317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(value == UCOL_ON) { 6327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("[hiraganaQ on] is not supported"); 6337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ruleIndex = j; 6357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 6367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(raw.equals("import")) { 6387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // BCP 47 language tag -> ICU locale ID 6397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ULocale localeID; 6407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert try { 6417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert localeID = new ULocale.Builder().setLanguageTag(v).build(); 6427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } catch(Exception e) { 6437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("expected language tag in [import langTag]", e); 6447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 6457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // localeID minus all keywords 6477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String baseID = localeID.getBaseName(); 6487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // @collation=type, or length=0 if not specified 6497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String collationType = localeID.getKeywordValue("collation"); 6507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(importer == null) { 6517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("[import langTag] is not supported"); 6527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 6537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String importedRules; 6547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert try { 6557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert importedRules = 6567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert importer.getRules(baseID, 6577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert collationType != null ? collationType : "standard"); 6587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } catch(Exception e) { 6597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("[import langTag] failed", e); 6607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 6617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String outerRules = rules; 6637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int outerRuleIndex = ruleIndex; 6647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert try { 6657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert parse(importedRules); 6667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } catch(Exception e) { 6677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ruleIndex = outerRuleIndex; // Restore the original index for error reporting. 6687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("parsing imported rules failed", e); 6697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert rules = outerRules; 6717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ruleIndex = j; 6727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 6747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(rules.charAt(j) == 0x5b) { // words end with [ 6767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UnicodeSet set = new UnicodeSet(); 6777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert j = parseUnicodeSet(j, set); 6787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(raw.equals("optimize")) { 6797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert try { 6807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert sink.optimize(set); 6817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } catch(Exception e) { 6827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("[optimize set] failed", e); 6837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ruleIndex = j; 6857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 6867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(raw.equals("suppressContractions")) { 6877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert try { 6887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert sink.suppressContractions(set); 6897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } catch(Exception e) { 6907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("[suppressContractions set] failed", e); 6917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ruleIndex = j; 6937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 6947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("not a valid setting/option"); 6977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 6997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private void parseReordering(CharSequence raw) throws ParseException { 7007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int i = 7; // after "reorder" 7017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(i == raw.length()) { 7027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // empty [reorder] with no codes 7037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert settings.resetReordering(); 7047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 7057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Parse the codes in [reorder aa bb cc]. 7077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ArrayList<Integer> reorderCodes = new ArrayList<Integer>(); 7087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while(i < raw.length()) { 7097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++i; // skip the word-separating space 7107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int limit = i; 7117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while(limit < raw.length() && raw.charAt(limit) != ' ') { ++limit; } 7127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String word = raw.subSequence(i, limit).toString(); 7137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int code = getReorderCode(word); 7147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(code < 0) { 7157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("unknown script or reorder code"); 7167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 7177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert reorderCodes.add(code); 7197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert i = limit; 7207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 721f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert if(reorderCodes.isEmpty()) { 7227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert settings.resetReordering(); 723f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert } else { 724f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert int[] codes = new int[reorderCodes.size()]; 725f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert int j = 0; 726f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert for(Integer code : reorderCodes) { codes[j++] = code; } 727f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert settings.setReordering(baseData, codes); 7287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 7317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final String[] gSpecialReorderCodes = { 7327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "space", "punct", "symbol", "currency", "digit" 7337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert }; 7347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 7357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 7367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Gets a script or reorder code from its string representation. 7377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return the script/reorder code, or 7387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * -1 if not recognized 7397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 7407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static int getReorderCode(String word) { 7417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for(int i = 0; i < gSpecialReorderCodes.length; ++i) { 7427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(word.equalsIgnoreCase(gSpecialReorderCodes[i])) { 7437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return Collator.ReorderCodes.FIRST + i; 7447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert try { 7477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int script = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, word); 7487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(script >= 0) { 7497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return script; 7507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } catch (IllegalIcuArgumentException e) { 7527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // fall through 7537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(word.equalsIgnoreCase("others")) { 7557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return Collator.ReorderCodes.OTHERS; // same as Zzzz = USCRIPT_UNKNOWN 7567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return -1; 7587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 7607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static int getOnOffValue(String s) { 7617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(s.equals("on")) { 7627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return UCOL_ON; 7637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(s.equals("off")) { 7647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return UCOL_OFF; 7657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 7667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return UCOL_DEFAULT; 7677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 7707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int parseUnicodeSet(int i, UnicodeSet set) throws ParseException { 7717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Collect a UnicodeSet pattern between a balanced pair of [brackets]. 7727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int level = 0; 7737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int j = i; 7747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for(;;) { 7757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(j == rules.length()) { 7767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("unbalanced UnicodeSet pattern brackets"); 7777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return j; 7787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char c = rules.charAt(j++); 7807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(c == 0x5b) { // '[' 7817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++level; 7827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(c == 0x5d) { // ']' 7837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(--level == 0) { break; } 7847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert try { 7877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert set.applyPattern(rules.substring(i, j)); 7887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } catch(Exception e) { 7897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("not a valid UnicodeSet pattern: " + e.getMessage()); 7907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert j = skipWhiteSpace(j); 7927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(j == rules.length() || rules.charAt(j) != 0x5d) { 7937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setParseError("missing option-terminating ']' after UnicodeSet pattern"); 7947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return j; 7957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return ++j; 7977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 7997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int readWords(int i, StringBuilder raw) { 8007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert raw.setLength(0); 8017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert i = skipWhiteSpace(i); 8027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for(;;) { 8037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(i >= rules.length()) { return 0; } 8047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char c = rules.charAt(i); 8057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(isSyntaxChar(c) && c != 0x2d && c != 0x5f) { // syntax except -_ 8067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(raw.length() == 0) { return i; } 8077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int lastIndex = raw.length() - 1; 8087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(raw.charAt(lastIndex) == ' ') { // remove trailing space 8097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert raw.setLength(lastIndex); 8107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return i; 8127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(PatternProps.isWhiteSpace(c)) { 8147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert raw.append(' '); 8157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert i = skipWhiteSpace(i + 1); 8167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 8177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert raw.append(c); 8187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++i; 8197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 8237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int skipComment(int i) { 8247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // skip to past the newline 8257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while(i < rules.length()) { 8267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char c = rules.charAt(i++); 8277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // LF or FF or CR or NEL or LS or PS 8287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(c == 0xa || c == 0xc || c == 0xd || c == 0x85 || c == 0x2028 || c == 0x2029) { 8297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Unicode Newline Guidelines: "A readline function should stop at NLF, LS, FF, or PS." 8307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // NLF (new line function) = CR or LF or CR+LF or NEL. 8317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // No need to collect all of CR+LF because a following LF will be ignored anyway. 8327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 8337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return i; 8367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 8387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private void setParseError(String reason) throws ParseException { 8397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw makeParseException(reason); 8407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 8427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private void setParseError(String reason, Exception e) throws ParseException { 8437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ParseException newExc = makeParseException(reason + ": " + e.getMessage()); 8447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert newExc.initCause(e); 8457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw newExc; 8467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 8487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private ParseException makeParseException(String reason) { 8497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return new ParseException(appendErrorContext(reason), ruleIndex); 8507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 8527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final int U_PARSE_CONTEXT_LEN = 16; 8537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 8547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // C++ setErrorContext() 8557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private String appendErrorContext(String reason) { 8567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Note: This relies on the calling code maintaining the ruleIndex 8577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // at a position that is useful for debugging. 8587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // For example, at the beginning of a reset or relation etc. 8597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuilder msg = new StringBuilder(reason); 8607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert msg.append(" at index ").append(ruleIndex); 8617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // We are not counting line numbers. 8627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 8637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert msg.append(" near \""); 8647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // before ruleIndex 8657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int start = ruleIndex - (U_PARSE_CONTEXT_LEN - 1); 8667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(start < 0) { 8677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert start = 0; 8687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(start > 0 && Character.isLowSurrogate(rules.charAt(start))) { 8697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++start; 8707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert msg.append(rules, start, ruleIndex); 8727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 8737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert msg.append('!'); 8747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // starting from ruleIndex 8757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int length = rules.length() - ruleIndex; 8767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(length >= U_PARSE_CONTEXT_LEN) { 8777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert length = U_PARSE_CONTEXT_LEN - 1; 8787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(Character.isHighSurrogate(rules.charAt(ruleIndex + length - 1))) { 8797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert --length; 8807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert msg.append(rules, ruleIndex, ruleIndex + length); 8837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return msg.append('\"').toString(); 8847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 8867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 8877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * ASCII [:P:] and [:S:]: 8887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * [\u0021-\u002F \u003A-\u0040 \u005B-\u0060 \u007B-\u007E] 8897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 8907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static boolean isSyntaxChar(int c) { 8917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return 0x21 <= c && c <= 0x7e && 8927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert (c <= 0x2f || (0x3a <= c && c <= 0x40) || 8937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert (0x5b <= c && c <= 0x60) || (0x7b <= c)); 8947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 8967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int skipWhiteSpace(int i) { 8977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while(i < rules.length() && PatternProps.isWhiteSpace(rules.charAt(i))) { 8987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++i; 8997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 9007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return i; 9017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 9027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 9037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private Normalizer2 nfd = Normalizer2.getNFDInstance(); 9047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private Normalizer2 nfc = Normalizer2.getNFCInstance(); 9057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 9067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private String rules; 9077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private final CollationData baseData; 9087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private CollationSettings settings; 9097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 9107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private Sink sink; 9117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private Importer importer; 9127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 9137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int ruleIndex; 9147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert} 915