12ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* GENERATED SOURCE. DO NOT MODIFY. */
2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others.
3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
42ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/*
52ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*******************************************************************************
62ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* Copyright (C) 2013-2015, International Business Machines
72ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* Corporation and others.  All Rights Reserved.
82ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*******************************************************************************
92ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* CollationRuleParser.java, ported from collationruleparser.h/.cpp
102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* C++ version created on: 2013apr10
122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* created by: Markus W. Scherer
132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*/
142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpackage android.icu.impl.coll;
162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.text.ParseException;
182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.ArrayList;
192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.IllegalIcuArgumentException;
212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.PatternProps;
222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UCharacter;
232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UProperty;
242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.Collator;
252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.Normalizer2;
262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.UTF16;
272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.UnicodeSet;
282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.util.ULocale;
292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
301537b2f39245c07b00aa78c3600f7aebcb172490Neil Fuller/**
311537b2f39245c07b00aa78c3600f7aebcb172490Neil Fuller * @hide Only a subset of ICU is exposed in Android
32836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller */
332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpublic final class CollationRuleParser {
342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /** Special reset positions. */
352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    enum Position {
362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        FIRST_TERTIARY_IGNORABLE,
372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        LAST_TERTIARY_IGNORABLE,
382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        FIRST_SECONDARY_IGNORABLE,
392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        LAST_SECONDARY_IGNORABLE,
402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        FIRST_PRIMARY_IGNORABLE,
412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        LAST_PRIMARY_IGNORABLE,
422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        FIRST_VARIABLE,
432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        LAST_VARIABLE,
442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        FIRST_REGULAR,
452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        LAST_REGULAR,
462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        FIRST_IMPLICIT,
472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        LAST_IMPLICIT,
482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        FIRST_TRAILING,
492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        LAST_TRAILING
502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static final Position[] POSITION_VALUES = Position.values();
522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * First character of contractions that encode special reset positions.
552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * U+FFFE cannot be tailored via rule syntax.
562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The second contraction character is POS_BASE + Position.
582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static final char POS_LEAD = 0xfffe;
602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Base for the second character of contractions that encode special reset positions.
622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Braille characters U+28xx are printable and normalization-inert.
632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @see POS_LEAD
642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static final char POS_BASE = 0x2800;
662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static abstract class Sink {
682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /**
692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Adds a reset.
702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * strength=UCOL_IDENTICAL for &str.
712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * strength=UCOL_PRIMARY/UCOL_SECONDARY/UCOL_TERTIARY for &[before n]str where n=1/2/3.
722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        abstract void addReset(int strength, CharSequence str);
742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /**
752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Adds a relation with strength and prefix | str / extension.
762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        abstract void addRelation(int strength, CharSequence prefix,
782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                CharSequence str, CharSequence extension);
792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        void suppressContractions(UnicodeSet set) {}
812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        void optimize(UnicodeSet set) {}
832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    interface Importer {
862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        String getRules(String localeID, String collationType);
872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Constructor.
912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The Sink must be set before parsing.
922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The Importer can be set, otherwise [import locale] syntax is not supported.
932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    CollationRuleParser(CollationData base) {
952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        baseData = base;
962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Sets the pointer to a Sink object.
1002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The pointer is aliased: Pointer copy without cloning or taking ownership.
1012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
1022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    void setSink(Sink sinkAlias) {
1032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        sink = sinkAlias;
1042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
1072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Sets the pointer to an Importer object.
1082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The pointer is aliased: Pointer copy without cloning or taking ownership.
1092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
1102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    void setImporter(Importer importerAlias) {
1112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        importer = importerAlias;
1122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    void parse(String ruleString, CollationSettings outSettings) throws ParseException {
1152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        settings = outSettings;
1162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        parse(ruleString);
1172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int UCOL_DEFAULT = -1;
1202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int UCOL_OFF = 0;
1212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int UCOL_ON = 1;
1222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /** UCOL_PRIMARY=0 .. UCOL_IDENTICAL=15 */
1242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int STRENGTH_MASK = 0xf;
1252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int STARRED_FLAG = 0x10;
1262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int OFFSET_SHIFT = 8;
1272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final String BEFORE = "[before";
1292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // In C++, we parse into temporary UnicodeString objects named "raw" or "str".
1312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // In Java, we reuse this StringBuilder.
1322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private final StringBuilder rawBuilder = new StringBuilder();
1332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private void parse(String ruleString) throws ParseException {
1352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        rules = ruleString;
1362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        ruleIndex = 0;
1372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while(ruleIndex < rules.length()) {
1392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            char c = rules.charAt(ruleIndex);
1402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(PatternProps.isWhiteSpace(c)) {
1412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ++ruleIndex;
1422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                continue;
1432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
1442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            switch(c) {
1452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case 0x26:  // '&'
1462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                parseRuleChain();
1472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
1482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case 0x5b:  // '['
1492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                parseSetting();
1502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
1512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case 0x23:  // '#' starts a comment, until the end of the line
1522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ruleIndex = skipComment(ruleIndex + 1);
1532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
1542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case 0x40:  // '@' is equivalent to [backwards 2]
1552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                settings.setFlag(CollationSettings.BACKWARD_SECONDARY, true);
1562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ++ruleIndex;
1572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
1582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case 0x21:  // '!' used to turn on Thai/Lao character reversal
1592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Accept but ignore. The root collator has contractions
1602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // that are equivalent to the character reversal, where appropriate.
1612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ++ruleIndex;
1622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
1632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            default:
1642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                setParseError("expected a reset or setting or comment");
1652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
1662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
1672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private void parseRuleChain() throws ParseException {
1712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int resetStrength = parseResetAndPosition();
1722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        boolean isFirstRelation = true;
1732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(;;) {
1742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int result = parseRelationOperator();
1752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(result < 0) {
1762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(ruleIndex < rules.length() && rules.charAt(ruleIndex) == 0x23) {
1772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // '#' starts a comment, until the end of the line
1782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ruleIndex = skipComment(ruleIndex + 1);
1792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    continue;
1802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
1812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(isFirstRelation) {
1822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    setParseError("reset not followed by a relation");
1832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
1842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return;
1852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
1862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int strength = result & STRENGTH_MASK;
1872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(resetStrength < Collator.IDENTICAL) {
1882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // reset-before rule chain
1892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(isFirstRelation) {
1902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(strength != resetStrength) {
1912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        setParseError("reset-before strength differs from its first relation");
1922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        return;
1932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
1942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
1952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(strength < resetStrength) {
1962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        setParseError("reset-before strength followed by a stronger relation");
1972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        return;
1982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
1992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
2002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int i = ruleIndex + (result >> OFFSET_SHIFT);  // skip over the relation operator
2022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if((result & STARRED_FLAG) == 0) {
2032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                parseRelationStrings(strength, i);
2042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
2052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                parseStarredCharacters(strength, i);
2062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            isFirstRelation = false;
2082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
2102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int parseResetAndPosition() throws ParseException {
2122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int i = skipWhiteSpace(ruleIndex + 1);
2132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int j;
2142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        char c;
2152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int resetStrength;
2162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(rules.regionMatches(i, BEFORE, 0, BEFORE.length()) &&
2172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                (j = i + BEFORE.length()) < rules.length() &&
2182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                PatternProps.isWhiteSpace(rules.charAt(j)) &&
2192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ((j = skipWhiteSpace(j + 1)) + 1) < rules.length() &&
2202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                0x31 <= (c = rules.charAt(j)) && c <= 0x33 &&
2212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                rules.charAt(j + 1) == 0x5d) {
2222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // &[before n] with n=1 or 2 or 3
2232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            resetStrength = Collator.PRIMARY + (c - 0x31);
2242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            i = skipWhiteSpace(j + 2);
2252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
2262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            resetStrength = Collator.IDENTICAL;
2272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(i >= rules.length()) {
2292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            setParseError("reset without position");
2302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return UCOL_DEFAULT;
2312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(rules.charAt(i) == 0x5b) {  // '['
2332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            i = parseSpecialPosition(i, rawBuilder);
2342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
2352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            i = parseTailoringString(i, rawBuilder);
2362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        try {
2382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            sink.addReset(resetStrength, rawBuilder);
2392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } catch(Exception e) {
2402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            setParseError("adding reset failed", e);
2412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return UCOL_DEFAULT;
2422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        ruleIndex = i;
2442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return resetStrength;
2452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
2462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int parseRelationOperator() {
2482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        ruleIndex = skipWhiteSpace(ruleIndex);
2492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(ruleIndex >= rules.length()) { return UCOL_DEFAULT; }
2502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int strength;
2512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int i = ruleIndex;
2522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        char c = rules.charAt(i++);
2532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        switch(c) {
2542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case 0x3c:  // '<'
2552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(i < rules.length() && rules.charAt(i) == 0x3c) {  // <<
2562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ++i;
2572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(i < rules.length() && rules.charAt(i) == 0x3c) {  // <<<
2582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ++i;
2592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(i < rules.length() && rules.charAt(i) == 0x3c) {  // <<<<
2602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        ++i;
2612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        strength = Collator.QUATERNARY;
2622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
2632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        strength = Collator.TERTIARY;
2642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
2652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
2662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    strength = Collator.SECONDARY;
2672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
2682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
2692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                strength = Collator.PRIMARY;
2702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(i < rules.length() && rules.charAt(i) == 0x2a) {  // '*'
2722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ++i;
2732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                strength |= STARRED_FLAG;
2742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
2762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case 0x3b:  // ';' same as <<
2772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            strength = Collator.SECONDARY;
2782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
2792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case 0x2c:  // ',' same as <<<
2802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            strength = Collator.TERTIARY;
2812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
2822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case 0x3d:  // '='
2832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            strength = Collator.IDENTICAL;
2842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(i < rules.length() && rules.charAt(i) == 0x2a) {  // '*'
2852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ++i;
2862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                strength |= STARRED_FLAG;
2872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
2892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        default:
2902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return UCOL_DEFAULT;
2912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return ((i - ruleIndex) << OFFSET_SHIFT) | strength;
2932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
2942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private void parseRelationStrings(int strength, int i) throws ParseException {
2962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Parse
2972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //     prefix | str / extension
2982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // where prefix and extension are optional.
2992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        String prefix = "";
3002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        CharSequence extension = "";
3012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        i = parseTailoringString(i, rawBuilder);
3022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        char next = (i < rules.length()) ? rules.charAt(i) : 0;
3032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(next == 0x7c) {  // '|' separates the context prefix from the string.
3042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            prefix = rawBuilder.toString();
3052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            i = parseTailoringString(i + 1, rawBuilder);
3062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            next = (i < rules.length()) ? rules.charAt(i) : 0;
3072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // str = rawBuilder (do not modify rawBuilder any more in this function)
3092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(next == 0x2f) {  // '/' separates the string from the extension.
3102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            StringBuilder extBuilder = new StringBuilder();
3112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            i = parseTailoringString(i + 1, extBuilder);
3122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            extension = extBuilder;
3132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(prefix.length() != 0) {
3152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int prefix0 = prefix.codePointAt(0);
3162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int c = rawBuilder.codePointAt(0);
3172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(!nfc.hasBoundaryBefore(prefix0) || !nfc.hasBoundaryBefore(c)) {
3182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                setParseError("in 'prefix|str', prefix and str must each start with an NFC boundary");
3192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return;
3202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        try {
3232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            sink.addRelation(strength, prefix, rawBuilder, extension);
3242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } catch(Exception e) {
3252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            setParseError("adding relation failed", e);
3262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return;
3272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        ruleIndex = i;
3292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
3302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private void parseStarredCharacters(int strength, int i) throws ParseException {
3322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        String empty = "";
3332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        i = parseString(skipWhiteSpace(i), rawBuilder);
3342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(rawBuilder.length() == 0) {
3352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            setParseError("missing starred-relation string");
3362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return;
3372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int prev = -1;
3392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int j = 0;
3402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(;;) {
3412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            while(j < rawBuilder.length()) {
3422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int c = rawBuilder.codePointAt(j);
3432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(!nfd.isInert(c)) {
3442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    setParseError("starred-relation string is not all NFD-inert");
3452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return;
3462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
3472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                try {
3482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    sink.addRelation(strength, empty, UTF16.valueOf(c), empty);
3492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } catch(Exception e) {
3502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    setParseError("adding relation failed", e);
3512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return;
3522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
3532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                j += Character.charCount(c);
3542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                prev = c;
3552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(i >= rules.length() || rules.charAt(i) != 0x2d) {  // '-'
3572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
3582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(prev < 0) {
3602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                setParseError("range without start in starred-relation string");
3612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return;
3622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            i = parseString(i + 1, rawBuilder);
3642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(rawBuilder.length() == 0) {
3652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                setParseError("range without end in starred-relation string");
3662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return;
3672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int c = rawBuilder.codePointAt(0);
3692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(c < prev) {
3702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                setParseError("range start greater than end in starred-relation string");
3712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return;
3722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // range prev-c
3742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            while(++prev <= c) {
3752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(!nfd.isInert(prev)) {
3762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    setParseError("starred-relation string range is not all NFD-inert");
3772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return;
3782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
3792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(isSurrogate(prev)) {
3802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    setParseError("starred-relation string range contains a surrogate");
3812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return;
3822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
3832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(0xfffd <= prev && prev <= 0xffff) {
3842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    setParseError("starred-relation string range contains U+FFFD, U+FFFE or U+FFFF");
3852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return;
3862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
3872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                try {
3882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    sink.addRelation(strength, empty, UTF16.valueOf(prev), empty);
3892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } catch(Exception e) {
3902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    setParseError("adding relation failed", e);
3912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return;
3922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
3932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            prev = -1;
3952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            j = Character.charCount(c);
3962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        ruleIndex = skipWhiteSpace(i);
3982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
3992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int parseTailoringString(int i, StringBuilder raw) throws ParseException {
4012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        i = parseString(skipWhiteSpace(i), raw);
4022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(raw.length() == 0) {
4032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            setParseError("missing relation string");
4042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return skipWhiteSpace(i);
4062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int parseString(int i, StringBuilder raw) throws ParseException {
4092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        raw.setLength(0);
4102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while(i < rules.length()) {
4112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            char c = rules.charAt(i++);
4122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(isSyntaxChar(c)) {
4132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(c == 0x27) {  // apostrophe
4142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(i < rules.length() && rules.charAt(i) == 0x27) {
4152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // Double apostrophe, encodes a single one.
4162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        raw.append((char)0x27);
4172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        ++i;
4182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        continue;
4192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
4202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Quote literal text until the next single apostrophe.
4212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    for(;;) {
4222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if(i == rules.length()) {
4232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            setParseError("quoted literal text missing terminating apostrophe");
4242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            return i;
4252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
4262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        c = rules.charAt(i++);
4272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if(c == 0x27) {
4282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            if(i < rules.length() && rules.charAt(i) == 0x27) {
4292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // Double apostrophe inside quoted literal text,
4302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // still encodes a single apostrophe.
4312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                ++i;
4322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            } else {
4332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                break;
4342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            }
4352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
4362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        raw.append(c);
4372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
4382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if(c == 0x5c) {  // backslash
4392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(i == rules.length()) {
4402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        setParseError("backslash escape at the end of the rule string");
4412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        return i;
4422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
4432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int cp = rules.codePointAt(i);
4442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    raw.appendCodePoint(cp);
4452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    i += Character.charCount(cp);
4462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
4472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Any other syntax character terminates a string.
4482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    --i;
4492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
4502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
4512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(PatternProps.isWhiteSpace(c)) {
4522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Unquoted white space terminates a string.
4532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                --i;
4542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
4552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
4562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                raw.append(c);
4572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(int j = 0; j < raw.length();) {
4602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int c = raw.codePointAt(j);
4612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(isSurrogate(c)) {
4622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                setParseError("string contains an unpaired surrogate");
4632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return i;
4642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(0xfffd <= c && c <= 0xffff) {
4662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                setParseError("string contains U+FFFD, U+FFFE or U+FFFF");
4672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return i;
4682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            j += Character.charCount(c);
4702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return i;
4722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // TODO: Widen UTF16.isSurrogate(char16) to take an int.
4752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final boolean isSurrogate(int c) {
4762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (c & 0xfffff800) == 0xd800;
4772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final String[] positions = {
4802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        "first tertiary ignorable",
4812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        "last tertiary ignorable",
4822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        "first secondary ignorable",
4832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        "last secondary ignorable",
4842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        "first primary ignorable",
4852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        "last primary ignorable",
4862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        "first variable",
4872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        "last variable",
4882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        "first regular",
4892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        "last regular",
4902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        "first implicit",
4912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        "last implicit",
4922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        "first trailing",
4932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        "last trailing"
4942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    };
4952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
4972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Sets str to a contraction of U+FFFE and (U+2800 + Position).
4982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return rule index after the special reset position
4992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @throws ParseException
5002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
5012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int parseSpecialPosition(int i, StringBuilder str) throws ParseException {
5022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int j = readWords(i + 1, rawBuilder);
5032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(j > i && rules.charAt(j) == 0x5d && rawBuilder.length() != 0) {  // words end with ]
5042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ++j;
5052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            String raw = rawBuilder.toString();
5062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            str.setLength(0);
5072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for(int pos = 0; pos < positions.length; ++pos) {
5082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(raw.equals(positions[pos])) {
5092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    str.append(POS_LEAD).append((char)(POS_BASE + pos));
5102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return j;
5112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
5122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(raw.equals("top")) {
5142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                str.append(POS_LEAD).append((char)(POS_BASE + Position.LAST_REGULAR.ordinal()));
5152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return j;
5162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(raw.equals("variable top")) {
5182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                str.append(POS_LEAD).append((char)(POS_BASE + Position.LAST_VARIABLE.ordinal()));
5192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return j;
5202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        setParseError("not a valid special reset position");
5232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return i;
5242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private void parseSetting() throws ParseException {
5272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int i = ruleIndex + 1;
5282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int j = readWords(i, rawBuilder);
5292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(j <= i || rawBuilder.length() == 0) {
5302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            setParseError("expected a setting/option at '['");
5312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // startsWith() etc. are available for String but not CharSequence/StringBuilder.
5332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        String raw = rawBuilder.toString();
5342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(rules.charAt(j) == 0x5d) {  // words end with ]
5352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ++j;
5362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(raw.startsWith("reorder") &&
5372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    (raw.length() == 7 || raw.charAt(7) == 0x20)) {
5382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                parseReordering(raw);
5392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ruleIndex = j;
5402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return;
5412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(raw.equals("backwards 2")) {
5432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                settings.setFlag(CollationSettings.BACKWARD_SECONDARY, true);
5442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ruleIndex = j;
5452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return;
5462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            String v;
5482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int valueIndex = raw.lastIndexOf(0x20);
5492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(valueIndex >= 0) {
5502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                v = raw.substring(valueIndex + 1);
5512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                raw = raw.substring(0, valueIndex);
5522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
5532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                v = "";
5542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(raw.equals("strength") && v.length() == 1) {
5562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int value = UCOL_DEFAULT;
5572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                char c = v.charAt(0);
5582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(0x31 <= c && c <= 0x34) {  // 1..4
5592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    value = Collator.PRIMARY + (c - 0x31);
5602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if(c == 0x49) {  // 'I'
5612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    value = Collator.IDENTICAL;
5622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
5632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(value != UCOL_DEFAULT) {
5642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    settings.setStrength(value);
5652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ruleIndex = j;
5662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return;
5672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
5682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(raw.equals("alternate")) {
5692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int value = UCOL_DEFAULT;
5702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(v.equals("non-ignorable")) {
5712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    value = 0;  // UCOL_NON_IGNORABLE
5722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if(v.equals("shifted")) {
5732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    value = 1;  // UCOL_SHIFTED
5742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
5752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(value != UCOL_DEFAULT) {
5762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    settings.setAlternateHandlingShifted(value > 0);
5772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ruleIndex = j;
5782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return;
5792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
5802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(raw.equals("maxVariable")) {
5812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int value = UCOL_DEFAULT;
5822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(v.equals("space")) {
5832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    value = CollationSettings.MAX_VAR_SPACE;
5842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if(v.equals("punct")) {
5852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    value = CollationSettings.MAX_VAR_PUNCT;
5862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if(v.equals("symbol")) {
5872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    value = CollationSettings.MAX_VAR_SYMBOL;
5882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if(v.equals("currency")) {
5892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    value = CollationSettings.MAX_VAR_CURRENCY;
5902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
5912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(value != UCOL_DEFAULT) {
5922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    settings.setMaxVariable(value, 0);
5932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    settings.variableTop = baseData.getLastPrimaryForGroup(
5942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        Collator.ReorderCodes.FIRST + value);
5952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    assert(settings.variableTop != 0);
5962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ruleIndex = j;
5972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return;
5982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
5992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(raw.equals("caseFirst")) {
6002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int value = UCOL_DEFAULT;
6012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(v.equals("off")) {
6022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    value = UCOL_OFF;
6032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if(v.equals("lower")) {
6042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    value = CollationSettings.CASE_FIRST;  // UCOL_LOWER_FIRST
6052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if(v.equals("upper")) {
6062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    value = CollationSettings.CASE_FIRST_AND_UPPER_MASK;  // UCOL_UPPER_FIRST
6072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(value != UCOL_DEFAULT) {
6092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    settings.setCaseFirst(value);
6102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ruleIndex = j;
6112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return;
6122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(raw.equals("caseLevel")) {
6142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int value = getOnOffValue(v);
6152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(value != UCOL_DEFAULT) {
6162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    settings.setFlag(CollationSettings.CASE_LEVEL, value > 0);
6172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ruleIndex = j;
6182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return;
6192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(raw.equals("normalization")) {
6212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int value = getOnOffValue(v);
6222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(value != UCOL_DEFAULT) {
6232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    settings.setFlag(CollationSettings.CHECK_FCD, value > 0);
6242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ruleIndex = j;
6252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return;
6262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(raw.equals("numericOrdering")) {
6282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int value = getOnOffValue(v);
6292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(value != UCOL_DEFAULT) {
6302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    settings.setFlag(CollationSettings.NUMERIC, value > 0);
6312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ruleIndex = j;
6322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return;
6332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(raw.equals("hiraganaQ")) {
6352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int value = getOnOffValue(v);
6362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(value != UCOL_DEFAULT) {
6372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(value == UCOL_ON) {
6382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        setParseError("[hiraganaQ on] is not supported");
6392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
6402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ruleIndex = j;
6412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return;
6422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(raw.equals("import")) {
6442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // BCP 47 language tag -> ICU locale ID
6452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ULocale localeID;
6462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                try {
6472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    localeID = new ULocale.Builder().setLanguageTag(v).build();
6482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } catch(Exception e) {
6492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    setParseError("expected language tag in [import langTag]", e);
6502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return;
6512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // localeID minus all keywords
6532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                String baseID = localeID.getBaseName();
6542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // @collation=type, or length=0 if not specified
6552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                String collationType = localeID.getKeywordValue("collation");
6562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(importer == null) {
6572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    setParseError("[import langTag] is not supported");
6582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
6592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    String importedRules;
6602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    try {
6612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        importedRules =
6622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            importer.getRules(baseID,
6632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    collationType != null ? collationType : "standard");
6642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } catch(Exception e) {
6652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        setParseError("[import langTag] failed", e);
6662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        return;
6672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
6682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    String outerRules = rules;
6692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int outerRuleIndex = ruleIndex;
6702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    try {
6712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        parse(importedRules);
6722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } catch(Exception e) {
6732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        ruleIndex = outerRuleIndex;  // Restore the original index for error reporting.
6742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        setParseError("parsing imported rules failed", e);
6752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
6762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    rules = outerRules;
6772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ruleIndex = j;
6782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return;
6802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
6812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if(rules.charAt(j) == 0x5b) {  // words end with [
6822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            UnicodeSet set = new UnicodeSet();
6832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            j = parseUnicodeSet(j, set);
6842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(raw.equals("optimize")) {
6852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                try {
6862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    sink.optimize(set);
6872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } catch(Exception e) {
6882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    setParseError("[optimize set] failed", e);
6892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ruleIndex = j;
6912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return;
6922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(raw.equals("suppressContractions")) {
6932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                try {
6942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    sink.suppressContractions(set);
6952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } catch(Exception e) {
6962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    setParseError("[suppressContractions set] failed", e);
6972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ruleIndex = j;
6992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return;
7002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        setParseError("not a valid setting/option");
7032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
7042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private void parseReordering(CharSequence raw) throws ParseException {
7062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int i = 7;  // after "reorder"
7072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(i == raw.length()) {
7082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // empty [reorder] with no codes
7092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            settings.resetReordering();
7102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return;
7112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Parse the codes in [reorder aa bb cc].
7132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        ArrayList<Integer> reorderCodes = new ArrayList<Integer>();
7142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while(i < raw.length()) {
7152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ++i;  // skip the word-separating space
7162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int limit = i;
7172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            while(limit < raw.length() && raw.charAt(limit) != ' ') { ++limit; }
7182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            String word = raw.subSequence(i, limit).toString();
7192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int code = getReorderCode(word);
7202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(code < 0) {
7212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                setParseError("unknown script or reorder code");
7222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return;
7232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            reorderCodes.add(code);
7252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            i = limit;
7262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(reorderCodes.isEmpty()) {
7282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            settings.resetReordering();
7292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
7302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int[] codes = new int[reorderCodes.size()];
7312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int j = 0;
7322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for(Integer code : reorderCodes) { codes[j++] = code; }
7332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            settings.setReordering(baseData, codes);
7342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
7362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final String[] gSpecialReorderCodes = {
7382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        "space", "punct", "symbol", "currency", "digit"
7392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    };
7402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
7422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Gets a script or reorder code from its string representation.
7432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return the script/reorder code, or
7442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * -1 if not recognized
7452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
7462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static int getReorderCode(String word) {
7472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(int i = 0; i < gSpecialReorderCodes.length; ++i) {
7482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(word.equalsIgnoreCase(gSpecialReorderCodes[i])) {
7492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return Collator.ReorderCodes.FIRST + i;
7502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        try {
7532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int script = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, word);
7542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(script >= 0) {
7552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return script;
7562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } catch (IllegalIcuArgumentException e) {
7582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // fall through
7592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(word.equalsIgnoreCase("others")) {
7612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return Collator.ReorderCodes.OTHERS;  // same as Zzzz = USCRIPT_UNKNOWN
7622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return -1;
7642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
7652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static int getOnOffValue(String s) {
7672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(s.equals("on")) {
7682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return UCOL_ON;
7692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if(s.equals("off")) {
7702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return UCOL_OFF;
7712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
7722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return UCOL_DEFAULT;
7732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
7752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int parseUnicodeSet(int i, UnicodeSet set) throws ParseException {
7772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Collect a UnicodeSet pattern between a balanced pair of [brackets].
7782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int level = 0;
7792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int j = i;
7802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(;;) {
7812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(j == rules.length()) {
7822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                setParseError("unbalanced UnicodeSet pattern brackets");
7832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return j;
7842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            char c = rules.charAt(j++);
7862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(c == 0x5b) {  // '['
7872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ++level;
7882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(c == 0x5d) {  // ']'
7892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(--level == 0) { break; }
7902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        try {
7932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            set.applyPattern(rules.substring(i, j));
7942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } catch(Exception e) {
7952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            setParseError("not a valid UnicodeSet pattern: " + e.getMessage());
7962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        j = skipWhiteSpace(j);
7982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(j == rules.length() || rules.charAt(j) != 0x5d) {
7992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            setParseError("missing option-terminating ']' after UnicodeSet pattern");
8002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return j;
8012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return ++j;
8032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
8042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int readWords(int i, StringBuilder raw) {
8062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        raw.setLength(0);
8072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        i = skipWhiteSpace(i);
8082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(;;) {
8092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(i >= rules.length()) { return 0; }
8102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            char c = rules.charAt(i);
8112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(isSyntaxChar(c) && c != 0x2d && c != 0x5f) {  // syntax except -_
8122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(raw.length() == 0) { return i; }
8132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int lastIndex = raw.length() - 1;
8142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(raw.charAt(lastIndex) == ' ') {  // remove trailing space
8152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    raw.setLength(lastIndex);
8162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
8172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return i;
8182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(PatternProps.isWhiteSpace(c)) {
8202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                raw.append(' ');
8212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                i = skipWhiteSpace(i + 1);
8222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
8232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                raw.append(c);
8242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ++i;
8252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
8282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int skipComment(int i) {
8302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // skip to past the newline
8312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while(i < rules.length()) {
8322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            char c = rules.charAt(i++);
8332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // LF or FF or CR or NEL or LS or PS
8342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(c == 0xa || c == 0xc || c == 0xd || c == 0x85 || c == 0x2028 || c == 0x2029) {
8352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Unicode Newline Guidelines: "A readline function should stop at NLF, LS, FF, or PS."
8362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // NLF (new line function) = CR or LF or CR+LF or NEL.
8372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // No need to collect all of CR+LF because a following LF will be ignored anyway.
8382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
8392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return i;
8422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
8432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private void setParseError(String reason) throws ParseException {
8452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        throw makeParseException(reason);
8462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
8472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private void setParseError(String reason, Exception e) throws ParseException {
8492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        ParseException newExc = makeParseException(reason + ": " + e.getMessage());
8502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        newExc.initCause(e);
8512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        throw newExc;
8522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
8532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private ParseException makeParseException(String reason) {
8552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return new ParseException(appendErrorContext(reason), ruleIndex);
8562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
8572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int U_PARSE_CONTEXT_LEN = 16;
8592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // C++ setErrorContext()
8612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private String appendErrorContext(String reason) {
8622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Note: This relies on the calling code maintaining the ruleIndex
8632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // at a position that is useful for debugging.
8642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // For example, at the beginning of a reset or relation etc.
8652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder msg = new StringBuilder(reason);
8662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        msg.append(" at index ").append(ruleIndex);
8672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // We are not counting line numbers.
8682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        msg.append(" near \"");
8702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // before ruleIndex
8712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int start = ruleIndex - (U_PARSE_CONTEXT_LEN - 1);
8722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(start < 0) {
8732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            start = 0;
8742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if(start > 0 && Character.isLowSurrogate(rules.charAt(start))) {
8752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ++start;
8762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        msg.append(rules, start, ruleIndex);
8782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        msg.append('!');
8802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // starting from ruleIndex
8812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int length = rules.length() - ruleIndex;
8822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(length >= U_PARSE_CONTEXT_LEN) {
8832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            length = U_PARSE_CONTEXT_LEN - 1;
8842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(Character.isHighSurrogate(rules.charAt(ruleIndex + length - 1))) {
8852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                --length;
8862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        msg.append(rules, ruleIndex, ruleIndex + length);
8892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return msg.append('\"').toString();
8902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
8912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
8932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * ASCII [:P:] and [:S:]:
8942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * [\u0021-\u002F \u003A-\u0040 \u005B-\u0060 \u007B-\u007E]
8952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
8962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static boolean isSyntaxChar(int c) {
8972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return 0x21 <= c && c <= 0x7e &&
8982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                (c <= 0x2f || (0x3a <= c && c <= 0x40) ||
8992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                (0x5b <= c && c <= 0x60) || (0x7b <= c));
9002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
9012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int skipWhiteSpace(int i) {
9032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while(i < rules.length() && PatternProps.isWhiteSpace(rules.charAt(i))) {
9042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ++i;
9052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
9062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return i;
9072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
9082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private Normalizer2 nfd = Normalizer2.getNFDInstance();
9102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private Normalizer2 nfc = Normalizer2.getNFCInstance();
9112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private String rules;
9132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private final CollationData baseData;
9142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private CollationSettings settings;
9152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private Sink sink;
9172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private Importer importer;
9182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int ruleIndex;
9202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller}
921