17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*******************************************************************************
3f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert* Copyright (C) 2013-2015, International Business Machines
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Corporation and others.  All Rights Reserved.
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*******************************************************************************
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* CollationRuleParser.java, ported from collationruleparser.h/.cpp
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* C++ version created on: 2013apr10
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* created by: Markus W. Scherer
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*/
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.impl.coll;
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.text.ParseException;
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.ArrayList;
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.IllegalIcuArgumentException;
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.PatternProps;
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UCharacter;
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UProperty;
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.Collator;
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.Normalizer2;
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.UTF16;
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.UnicodeSet;
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.ULocale;
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic final class CollationRuleParser {
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Special reset positions. */
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    enum Position {
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        FIRST_TERTIARY_IGNORABLE,
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        LAST_TERTIARY_IGNORABLE,
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        FIRST_SECONDARY_IGNORABLE,
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        LAST_SECONDARY_IGNORABLE,
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        FIRST_PRIMARY_IGNORABLE,
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        LAST_PRIMARY_IGNORABLE,
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        FIRST_VARIABLE,
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        LAST_VARIABLE,
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        FIRST_REGULAR,
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        LAST_REGULAR,
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        FIRST_IMPLICIT,
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        LAST_IMPLICIT,
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        FIRST_TRAILING,
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        LAST_TRAILING
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final Position[] POSITION_VALUES = Position.values();
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * First character of contractions that encode special reset positions.
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * U+FFFE cannot be tailored via rule syntax.
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The second contraction character is POS_BASE + Position.
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final char POS_LEAD = 0xfffe;
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Base for the second character of contractions that encode special reset positions.
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Braille characters U+28xx are printable and normalization-inert.
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see POS_LEAD
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final char POS_BASE = 0x2800;
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static abstract class Sink {
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /**
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Adds a reset.
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * strength=UCOL_IDENTICAL for &str.
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * strength=UCOL_PRIMARY/UCOL_SECONDARY/UCOL_TERTIARY for &[before n]str where n=1/2/3.
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        abstract void addReset(int strength, CharSequence str);
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /**
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Adds a relation with strength and prefix | str / extension.
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        abstract void addRelation(int strength, CharSequence prefix,
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                CharSequence str, CharSequence extension);
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        void suppressContractions(UnicodeSet set) {}
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        void optimize(UnicodeSet set) {}
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    interface Importer {
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String getRules(String localeID, String collationType);
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Constructor.
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The Sink must be set before parsing.
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The Importer can be set, otherwise [import locale] syntax is not supported.
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    CollationRuleParser(CollationData base) {
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        baseData = base;
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Sets the pointer to a Sink object.
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The pointer is aliased: Pointer copy without cloning or taking ownership.
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    void setSink(Sink sinkAlias) {
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        sink = sinkAlias;
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Sets the pointer to an Importer object.
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The pointer is aliased: Pointer copy without cloning or taking ownership.
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    void setImporter(Importer importerAlias) {
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        importer = importerAlias;
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    void parse(String ruleString, CollationSettings outSettings) throws ParseException {
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        settings = outSettings;
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        parse(ruleString);
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int UCOL_DEFAULT = -1;
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int UCOL_OFF = 0;
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int UCOL_ON = 1;
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** UCOL_PRIMARY=0 .. UCOL_IDENTICAL=15 */
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int STRENGTH_MASK = 0xf;
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int STARRED_FLAG = 0x10;
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int OFFSET_SHIFT = 8;
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final String BEFORE = "[before";
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // In C++, we parse into temporary UnicodeString objects named "raw" or "str".
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // In Java, we reuse this StringBuilder.
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private final StringBuilder rawBuilder = new StringBuilder();
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void parse(String ruleString) throws ParseException {
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        rules = ruleString;
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ruleIndex = 0;
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while(ruleIndex < rules.length()) {
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            char c = rules.charAt(ruleIndex);
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(PatternProps.isWhiteSpace(c)) {
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ++ruleIndex;
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                continue;
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            switch(c) {
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            case 0x26:  // '&'
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                parseRuleChain();
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            case 0x5b:  // '['
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                parseSetting();
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            case 0x23:  // '#' starts a comment, until the end of the line
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ruleIndex = skipComment(ruleIndex + 1);
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            case 0x40:  // '@' is equivalent to [backwards 2]
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                settings.setFlag(CollationSettings.BACKWARD_SECONDARY, true);
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ++ruleIndex;
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            case 0x21:  // '!' used to turn on Thai/Lao character reversal
1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Accept but ignore. The root collator has contractions
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // that are equivalent to the character reversal, where appropriate.
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ++ruleIndex;
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            default:
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                setParseError("expected a reset or setting or comment");
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void parseRuleChain() throws ParseException {
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int resetStrength = parseResetAndPosition();
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean isFirstRelation = true;
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;;) {
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int result = parseRelationOperator();
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(result < 0) {
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(ruleIndex < rules.length() && rules.charAt(ruleIndex) == 0x23) {
1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // '#' starts a comment, until the end of the line
1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ruleIndex = skipComment(ruleIndex + 1);
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    continue;
1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(isFirstRelation) {
1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    setParseError("reset not followed by a relation");
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return;
1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int strength = result & STRENGTH_MASK;
1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(resetStrength < Collator.IDENTICAL) {
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // reset-before rule chain
1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(isFirstRelation) {
1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(strength != resetStrength) {
1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        setParseError("reset-before strength differs from its first relation");
1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        return;
1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(strength < resetStrength) {
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        setParseError("reset-before strength followed by a stronger relation");
1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        return;
1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int i = ruleIndex + (result >> OFFSET_SHIFT);  // skip over the relation operator
1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if((result & STARRED_FLAG) == 0) {
1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                parseRelationStrings(strength, i);
1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                parseStarredCharacters(strength, i);
2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            isFirstRelation = false;
2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int parseResetAndPosition() throws ParseException {
2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int i = skipWhiteSpace(ruleIndex + 1);
2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int j;
2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        char c;
2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int resetStrength;
2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(rules.regionMatches(i, BEFORE, 0, BEFORE.length()) &&
2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                (j = i + BEFORE.length()) < rules.length() &&
2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                PatternProps.isWhiteSpace(rules.charAt(j)) &&
2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ((j = skipWhiteSpace(j + 1)) + 1) < rules.length() &&
2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                0x31 <= (c = rules.charAt(j)) && c <= 0x33 &&
2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                rules.charAt(j + 1) == 0x5d) {
2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // &[before n] with n=1 or 2 or 3
2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            resetStrength = Collator.PRIMARY + (c - 0x31);
2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            i = skipWhiteSpace(j + 2);
2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            resetStrength = Collator.IDENTICAL;
2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(i >= rules.length()) {
2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            setParseError("reset without position");
2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return UCOL_DEFAULT;
2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(rules.charAt(i) == 0x5b) {  // '['
2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            i = parseSpecialPosition(i, rawBuilder);
2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            i = parseTailoringString(i, rawBuilder);
2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        try {
2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            sink.addReset(resetStrength, rawBuilder);
2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } catch(Exception e) {
2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            setParseError("adding reset failed", e);
2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return UCOL_DEFAULT;
2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ruleIndex = i;
2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return resetStrength;
2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int parseRelationOperator() {
2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ruleIndex = skipWhiteSpace(ruleIndex);
2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(ruleIndex >= rules.length()) { return UCOL_DEFAULT; }
2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int strength;
2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int i = ruleIndex;
2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        char c = rules.charAt(i++);
2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        switch(c) {
2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case 0x3c:  // '<'
2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(i < rules.length() && rules.charAt(i) == 0x3c) {  // <<
2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ++i;
2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(i < rules.length() && rules.charAt(i) == 0x3c) {  // <<<
2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ++i;
2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(i < rules.length() && rules.charAt(i) == 0x3c) {  // <<<<
2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        ++i;
2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        strength = Collator.QUATERNARY;
2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else {
2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        strength = Collator.TERTIARY;
2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    strength = Collator.SECONDARY;
2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                strength = Collator.PRIMARY;
2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(i < rules.length() && rules.charAt(i) == 0x2a) {  // '*'
2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ++i;
2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                strength |= STARRED_FLAG;
2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            break;
2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case 0x3b:  // ';' same as <<
2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            strength = Collator.SECONDARY;
2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            break;
2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case 0x2c:  // ',' same as <<<
2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            strength = Collator.TERTIARY;
2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            break;
2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case 0x3d:  // '='
2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            strength = Collator.IDENTICAL;
2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(i < rules.length() && rules.charAt(i) == 0x2a) {  // '*'
2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ++i;
2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                strength |= STARRED_FLAG;
2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            break;
2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        default:
2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return UCOL_DEFAULT;
2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return ((i - ruleIndex) << OFFSET_SHIFT) | strength;
2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void parseRelationStrings(int strength, int i) throws ParseException {
2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Parse
2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //     prefix | str / extension
2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // where prefix and extension are optional.
2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String prefix = "";
2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CharSequence extension = "";
2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        i = parseTailoringString(i, rawBuilder);
2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        char next = (i < rules.length()) ? rules.charAt(i) : 0;
2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(next == 0x7c) {  // '|' separates the context prefix from the string.
2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            prefix = rawBuilder.toString();
2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            i = parseTailoringString(i + 1, rawBuilder);
3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            next = (i < rules.length()) ? rules.charAt(i) : 0;
3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // str = rawBuilder (do not modify rawBuilder any more in this function)
3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(next == 0x2f) {  // '/' separates the string from the extension.
3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            StringBuilder extBuilder = new StringBuilder();
3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            i = parseTailoringString(i + 1, extBuilder);
3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            extension = extBuilder;
3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(prefix.length() != 0) {
3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int prefix0 = prefix.codePointAt(0);
3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int c = rawBuilder.codePointAt(0);
3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(!nfc.hasBoundaryBefore(prefix0) || !nfc.hasBoundaryBefore(c)) {
3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                setParseError("in 'prefix|str', prefix and str must each start with an NFC boundary");
3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return;
3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        try {
3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            sink.addRelation(strength, prefix, rawBuilder, extension);
3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } catch(Exception e) {
3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            setParseError("adding relation failed", e);
3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return;
3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ruleIndex = i;
3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void parseStarredCharacters(int strength, int i) throws ParseException {
3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String empty = "";
3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        i = parseString(skipWhiteSpace(i), rawBuilder);
3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(rawBuilder.length() == 0) {
3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            setParseError("missing starred-relation string");
3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return;
3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int prev = -1;
3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int j = 0;
3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;;) {
3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while(j < rawBuilder.length()) {
3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int c = rawBuilder.codePointAt(j);
3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(!nfd.isInert(c)) {
3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    setParseError("starred-relation string is not all NFD-inert");
3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return;
3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                try {
3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    sink.addRelation(strength, empty, UTF16.valueOf(c), empty);
3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } catch(Exception e) {
3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    setParseError("adding relation failed", e);
3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return;
3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                j += Character.charCount(c);
3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                prev = c;
3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(i >= rules.length() || rules.charAt(i) != 0x2d) {  // '-'
3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(prev < 0) {
3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                setParseError("range without start in starred-relation string");
3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return;
3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            i = parseString(i + 1, rawBuilder);
3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(rawBuilder.length() == 0) {
3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                setParseError("range without end in starred-relation string");
3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return;
3617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int c = rawBuilder.codePointAt(0);
3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(c < prev) {
3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                setParseError("range start greater than end in starred-relation string");
3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return;
3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // range prev-c
3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while(++prev <= c) {
3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(!nfd.isInert(prev)) {
3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    setParseError("starred-relation string range is not all NFD-inert");
3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return;
3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(isSurrogate(prev)) {
3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    setParseError("starred-relation string range contains a surrogate");
3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return;
3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(0xfffd <= prev && prev <= 0xffff) {
3787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    setParseError("starred-relation string range contains U+FFFD, U+FFFE or U+FFFF");
3797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return;
3807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                try {
3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    sink.addRelation(strength, empty, UTF16.valueOf(prev), empty);
3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } catch(Exception e) {
3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    setParseError("adding relation failed", e);
3857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return;
3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            prev = -1;
3897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            j = Character.charCount(c);
3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ruleIndex = skipWhiteSpace(i);
3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int parseTailoringString(int i, StringBuilder raw) throws ParseException {
3957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        i = parseString(skipWhiteSpace(i), raw);
3967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(raw.length() == 0) {
3977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            setParseError("missing relation string");
3987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return skipWhiteSpace(i);
4007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int parseString(int i, StringBuilder raw) throws ParseException {
4037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        raw.setLength(0);
4047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while(i < rules.length()) {
4057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            char c = rules.charAt(i++);
4067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(isSyntaxChar(c)) {
4077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(c == 0x27) {  // apostrophe
4087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(i < rules.length() && rules.charAt(i) == 0x27) {
4097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // Double apostrophe, encodes a single one.
4107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        raw.append((char)0x27);
4117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        ++i;
4127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        continue;
4137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
4147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Quote literal text until the next single apostrophe.
4157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    for(;;) {
4167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if(i == rules.length()) {
4177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            setParseError("quoted literal text missing terminating apostrophe");
4187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            return i;
4197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
4207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        c = rules.charAt(i++);
4217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if(c == 0x27) {
4227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            if(i < rules.length() && rules.charAt(i) == 0x27) {
4237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                // Double apostrophe inside quoted literal text,
4247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                // still encodes a single apostrophe.
4257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                ++i;
4267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            } else {
4277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                break;
4287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            }
4297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
4307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        raw.append(c);
4317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
4327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(c == 0x5c) {  // backslash
4337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(i == rules.length()) {
4347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        setParseError("backslash escape at the end of the rule string");
4357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        return i;
4367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
4377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    int cp = rules.codePointAt(i);
4387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    raw.appendCodePoint(cp);
4397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    i += Character.charCount(cp);
4407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
4417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Any other syntax character terminates a string.
4427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    --i;
4437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
4447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
4457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(PatternProps.isWhiteSpace(c)) {
4467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Unquoted white space terminates a string.
4477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                --i;
4487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
4497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
4507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                raw.append(c);
4517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(int j = 0; j < raw.length();) {
4547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int c = raw.codePointAt(j);
4557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(isSurrogate(c)) {
4567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                setParseError("string contains an unpaired surrogate");
4577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return i;
4587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(0xfffd <= c && c <= 0xffff) {
4607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                setParseError("string contains U+FFFD, U+FFFE or U+FFFF");
4617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return i;
4627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            j += Character.charCount(c);
4647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return i;
4667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // TODO: Widen UTF16.isSurrogate(char16) to take an int.
4697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final boolean isSurrogate(int c) {
4707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (c & 0xfffff800) == 0xd800;
4717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final String[] positions = {
4747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        "first tertiary ignorable",
4757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        "last tertiary ignorable",
4767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        "first secondary ignorable",
4777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        "last secondary ignorable",
4787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        "first primary ignorable",
4797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        "last primary ignorable",
4807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        "first variable",
4817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        "last variable",
4827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        "first regular",
4837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        "last regular",
4847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        "first implicit",
4857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        "last implicit",
4867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        "first trailing",
4877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        "last trailing"
4887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    };
4897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Sets str to a contraction of U+FFFE and (U+2800 + Position).
4927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return rule index after the special reset position
4937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws ParseException
4947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int parseSpecialPosition(int i, StringBuilder str) throws ParseException {
4967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int j = readWords(i + 1, rawBuilder);
4977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(j > i && rules.charAt(j) == 0x5d && rawBuilder.length() != 0) {  // words end with ]
4987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ++j;
4997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String raw = rawBuilder.toString();
5007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            str.setLength(0);
5017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for(int pos = 0; pos < positions.length; ++pos) {
5027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(raw.equals(positions[pos])) {
5037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    str.append(POS_LEAD).append((char)(POS_BASE + pos));
5047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return j;
5057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
5067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(raw.equals("top")) {
5087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                str.append(POS_LEAD).append((char)(POS_BASE + Position.LAST_REGULAR.ordinal()));
5097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return j;
5107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(raw.equals("variable top")) {
5127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                str.append(POS_LEAD).append((char)(POS_BASE + Position.LAST_VARIABLE.ordinal()));
5137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return j;
5147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        setParseError("not a valid special reset position");
5177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return i;
5187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void parseSetting() throws ParseException {
5217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int i = ruleIndex + 1;
5227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int j = readWords(i, rawBuilder);
5237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(j <= i || rawBuilder.length() == 0) {
5247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            setParseError("expected a setting/option at '['");
5257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // startsWith() etc. are available for String but not CharSequence/StringBuilder.
5277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String raw = rawBuilder.toString();
5287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(rules.charAt(j) == 0x5d) {  // words end with ]
5297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ++j;
5307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(raw.startsWith("reorder") &&
5317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    (raw.length() == 7 || raw.charAt(7) == 0x20)) {
5327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                parseReordering(raw);
5337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ruleIndex = j;
5347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return;
5357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(raw.equals("backwards 2")) {
5377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                settings.setFlag(CollationSettings.BACKWARD_SECONDARY, true);
5387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ruleIndex = j;
5397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return;
5407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String v;
5427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int valueIndex = raw.lastIndexOf(0x20);
5437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(valueIndex >= 0) {
5447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                v = raw.substring(valueIndex + 1);
5457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                raw = raw.substring(0, valueIndex);
5467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
5477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                v = "";
5487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(raw.equals("strength") && v.length() == 1) {
5507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int value = UCOL_DEFAULT;
5517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                char c = v.charAt(0);
5527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(0x31 <= c && c <= 0x34) {  // 1..4
5537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    value = Collator.PRIMARY + (c - 0x31);
5547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(c == 0x49) {  // 'I'
5557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    value = Collator.IDENTICAL;
5567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
5577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(value != UCOL_DEFAULT) {
5587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    settings.setStrength(value);
5597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ruleIndex = j;
5607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return;
5617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
5627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(raw.equals("alternate")) {
5637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int value = UCOL_DEFAULT;
5647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(v.equals("non-ignorable")) {
5657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    value = 0;  // UCOL_NON_IGNORABLE
5667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(v.equals("shifted")) {
5677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    value = 1;  // UCOL_SHIFTED
5687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
5697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(value != UCOL_DEFAULT) {
5707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    settings.setAlternateHandlingShifted(value > 0);
5717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ruleIndex = j;
5727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return;
5737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
5747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(raw.equals("maxVariable")) {
5757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int value = UCOL_DEFAULT;
5767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(v.equals("space")) {
5777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    value = CollationSettings.MAX_VAR_SPACE;
5787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(v.equals("punct")) {
5797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    value = CollationSettings.MAX_VAR_PUNCT;
5807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(v.equals("symbol")) {
5817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    value = CollationSettings.MAX_VAR_SYMBOL;
5827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(v.equals("currency")) {
5837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    value = CollationSettings.MAX_VAR_CURRENCY;
5847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
5857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(value != UCOL_DEFAULT) {
5867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    settings.setMaxVariable(value, 0);
5877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    settings.variableTop = baseData.getLastPrimaryForGroup(
5887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        Collator.ReorderCodes.FIRST + value);
5897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    assert(settings.variableTop != 0);
5907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ruleIndex = j;
5917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return;
5927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
5937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(raw.equals("caseFirst")) {
5947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int value = UCOL_DEFAULT;
5957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(v.equals("off")) {
5967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    value = UCOL_OFF;
5977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(v.equals("lower")) {
5987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    value = CollationSettings.CASE_FIRST;  // UCOL_LOWER_FIRST
5997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(v.equals("upper")) {
6007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    value = CollationSettings.CASE_FIRST_AND_UPPER_MASK;  // UCOL_UPPER_FIRST
6017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
6027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(value != UCOL_DEFAULT) {
6037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    settings.setCaseFirst(value);
6047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ruleIndex = j;
6057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return;
6067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
6077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(raw.equals("caseLevel")) {
6087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int value = getOnOffValue(v);
6097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(value != UCOL_DEFAULT) {
6107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    settings.setFlag(CollationSettings.CASE_LEVEL, value > 0);
6117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ruleIndex = j;
6127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return;
6137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
6147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(raw.equals("normalization")) {
6157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int value = getOnOffValue(v);
6167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(value != UCOL_DEFAULT) {
6177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    settings.setFlag(CollationSettings.CHECK_FCD, value > 0);
6187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ruleIndex = j;
6197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return;
6207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
6217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(raw.equals("numericOrdering")) {
6227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int value = getOnOffValue(v);
6237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(value != UCOL_DEFAULT) {
6247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    settings.setFlag(CollationSettings.NUMERIC, value > 0);
6257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ruleIndex = j;
6267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return;
6277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
6287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(raw.equals("hiraganaQ")) {
6297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int value = getOnOffValue(v);
6307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(value != UCOL_DEFAULT) {
6317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(value == UCOL_ON) {
6327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        setParseError("[hiraganaQ on] is not supported");
6337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
6347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ruleIndex = j;
6357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return;
6367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
6377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(raw.equals("import")) {
6387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // BCP 47 language tag -> ICU locale ID
6397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ULocale localeID;
6407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                try {
6417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    localeID = new ULocale.Builder().setLanguageTag(v).build();
6427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } catch(Exception e) {
6437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    setParseError("expected language tag in [import langTag]", e);
6447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return;
6457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
6467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // localeID minus all keywords
6477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                String baseID = localeID.getBaseName();
6487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // @collation=type, or length=0 if not specified
6497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                String collationType = localeID.getKeywordValue("collation");
6507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(importer == null) {
6517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    setParseError("[import langTag] is not supported");
6527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
6537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    String importedRules;
6547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    try {
6557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        importedRules =
6567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            importer.getRules(baseID,
6577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    collationType != null ? collationType : "standard");
6587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } catch(Exception e) {
6597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        setParseError("[import langTag] failed", e);
6607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        return;
6617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
6627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    String outerRules = rules;
6637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    int outerRuleIndex = ruleIndex;
6647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    try {
6657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        parse(importedRules);
6667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } catch(Exception e) {
6677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        ruleIndex = outerRuleIndex;  // Restore the original index for error reporting.
6687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        setParseError("parsing imported rules failed", e);
6697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
6707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    rules = outerRules;
6717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ruleIndex = j;
6727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
6737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return;
6747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
6757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if(rules.charAt(j) == 0x5b) {  // words end with [
6767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            UnicodeSet set = new UnicodeSet();
6777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            j = parseUnicodeSet(j, set);
6787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(raw.equals("optimize")) {
6797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                try {
6807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    sink.optimize(set);
6817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } catch(Exception e) {
6827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    setParseError("[optimize set] failed", e);
6837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
6847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ruleIndex = j;
6857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return;
6867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(raw.equals("suppressContractions")) {
6877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                try {
6887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    sink.suppressContractions(set);
6897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } catch(Exception e) {
6907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    setParseError("[suppressContractions set] failed", e);
6917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
6927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ruleIndex = j;
6937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return;
6947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
6957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        setParseError("not a valid setting/option");
6977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void parseReordering(CharSequence raw) throws ParseException {
7007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int i = 7;  // after "reorder"
7017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(i == raw.length()) {
7027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // empty [reorder] with no codes
7037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            settings.resetReordering();
7047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return;
7057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Parse the codes in [reorder aa bb cc].
7077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ArrayList<Integer> reorderCodes = new ArrayList<Integer>();
7087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while(i < raw.length()) {
7097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ++i;  // skip the word-separating space
7107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int limit = i;
7117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while(limit < raw.length() && raw.charAt(limit) != ' ') { ++limit; }
7127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String word = raw.subSequence(i, limit).toString();
7137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int code = getReorderCode(word);
7147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(code < 0) {
7157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                setParseError("unknown script or reorder code");
7167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return;
7177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
7187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            reorderCodes.add(code);
7197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            i = limit;
7207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
721f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        if(reorderCodes.isEmpty()) {
7227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            settings.resetReordering();
723f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        } else {
724f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            int[] codes = new int[reorderCodes.size()];
725f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            int j = 0;
726f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            for(Integer code : reorderCodes) { codes[j++] = code; }
727f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            settings.setReordering(baseData, codes);
7287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final String[] gSpecialReorderCodes = {
7327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        "space", "punct", "symbol", "currency", "digit"
7337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    };
7347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
7367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Gets a script or reorder code from its string representation.
7377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the script/reorder code, or
7387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * -1 if not recognized
7397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
7407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static int getReorderCode(String word) {
7417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(int i = 0; i < gSpecialReorderCodes.length; ++i) {
7427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(word.equalsIgnoreCase(gSpecialReorderCodes[i])) {
7437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return Collator.ReorderCodes.FIRST + i;
7447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
7457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        try {
7477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int script = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, word);
7487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(script >= 0) {
7497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return script;
7507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
7517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } catch (IllegalIcuArgumentException e) {
7527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // fall through
7537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(word.equalsIgnoreCase("others")) {
7557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return Collator.ReorderCodes.OTHERS;  // same as Zzzz = USCRIPT_UNKNOWN
7567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return -1;
7587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static int getOnOffValue(String s) {
7617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(s.equals("on")) {
7627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return UCOL_ON;
7637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if(s.equals("off")) {
7647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return UCOL_OFF;
7657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
7667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return UCOL_DEFAULT;
7677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int parseUnicodeSet(int i, UnicodeSet set) throws ParseException {
7717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Collect a UnicodeSet pattern between a balanced pair of [brackets].
7727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int level = 0;
7737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int j = i;
7747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;;) {
7757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(j == rules.length()) {
7767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                setParseError("unbalanced UnicodeSet pattern brackets");
7777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return j;
7787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
7797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            char c = rules.charAt(j++);
7807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(c == 0x5b) {  // '['
7817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ++level;
7827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(c == 0x5d) {  // ']'
7837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(--level == 0) { break; }
7847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
7857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        try {
7877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            set.applyPattern(rules.substring(i, j));
7887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } catch(Exception e) {
7897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            setParseError("not a valid UnicodeSet pattern: " + e.getMessage());
7907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        j = skipWhiteSpace(j);
7927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(j == rules.length() || rules.charAt(j) != 0x5d) {
7937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            setParseError("missing option-terminating ']' after UnicodeSet pattern");
7947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return j;
7957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return ++j;
7977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int readWords(int i, StringBuilder raw) {
8007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        raw.setLength(0);
8017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        i = skipWhiteSpace(i);
8027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;;) {
8037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(i >= rules.length()) { return 0; }
8047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            char c = rules.charAt(i);
8057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(isSyntaxChar(c) && c != 0x2d && c != 0x5f) {  // syntax except -_
8067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(raw.length() == 0) { return i; }
8077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int lastIndex = raw.length() - 1;
8087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(raw.charAt(lastIndex) == ' ') {  // remove trailing space
8097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    raw.setLength(lastIndex);
8107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
8117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return i;
8127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
8137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(PatternProps.isWhiteSpace(c)) {
8147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                raw.append(' ');
8157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                i = skipWhiteSpace(i + 1);
8167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
8177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                raw.append(c);
8187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ++i;
8197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
8207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int skipComment(int i) {
8247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // skip to past the newline
8257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while(i < rules.length()) {
8267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            char c = rules.charAt(i++);
8277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // LF or FF or CR or NEL or LS or PS
8287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(c == 0xa || c == 0xc || c == 0xd || c == 0x85 || c == 0x2028 || c == 0x2029) {
8297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Unicode Newline Guidelines: "A readline function should stop at NLF, LS, FF, or PS."
8307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // NLF (new line function) = CR or LF or CR+LF or NEL.
8317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // No need to collect all of CR+LF because a following LF will be ignored anyway.
8327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
8337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
8347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return i;
8367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void setParseError(String reason) throws ParseException {
8397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        throw makeParseException(reason);
8407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void setParseError(String reason, Exception e) throws ParseException {
8437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ParseException newExc = makeParseException(reason + ": " + e.getMessage());
8447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        newExc.initCause(e);
8457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        throw newExc;
8467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private ParseException makeParseException(String reason) {
8497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return new ParseException(appendErrorContext(reason), ruleIndex);
8507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int U_PARSE_CONTEXT_LEN = 16;
8537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // C++ setErrorContext()
8557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private String appendErrorContext(String reason) {
8567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Note: This relies on the calling code maintaining the ruleIndex
8577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // at a position that is useful for debugging.
8587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // For example, at the beginning of a reset or relation etc.
8597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuilder msg = new StringBuilder(reason);
8607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        msg.append(" at index ").append(ruleIndex);
8617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // We are not counting line numbers.
8627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        msg.append(" near \"");
8647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // before ruleIndex
8657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int start = ruleIndex - (U_PARSE_CONTEXT_LEN - 1);
8667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(start < 0) {
8677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            start = 0;
8687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if(start > 0 && Character.isLowSurrogate(rules.charAt(start))) {
8697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ++start;
8707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        msg.append(rules, start, ruleIndex);
8727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        msg.append('!');
8747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // starting from ruleIndex
8757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int length = rules.length() - ruleIndex;
8767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(length >= U_PARSE_CONTEXT_LEN) {
8777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            length = U_PARSE_CONTEXT_LEN - 1;
8787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(Character.isHighSurrogate(rules.charAt(ruleIndex + length - 1))) {
8797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                --length;
8807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
8817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        msg.append(rules, ruleIndex, ruleIndex + length);
8837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return msg.append('\"').toString();
8847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
8877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * ASCII [:P:] and [:S:]:
8887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * [\u0021-\u002F \u003A-\u0040 \u005B-\u0060 \u007B-\u007E]
8897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
8907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static boolean isSyntaxChar(int c) {
8917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return 0x21 <= c && c <= 0x7e &&
8927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                (c <= 0x2f || (0x3a <= c && c <= 0x40) ||
8937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                (0x5b <= c && c <= 0x60) || (0x7b <= c));
8947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int skipWhiteSpace(int i) {
8977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while(i < rules.length() && PatternProps.isWhiteSpace(rules.charAt(i))) {
8987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ++i;
8997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return i;
9017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
9027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private Normalizer2 nfd = Normalizer2.getNFDInstance();
9047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private Normalizer2 nfc = Normalizer2.getNFCInstance();
9057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private String rules;
9077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private final CollationData baseData;
9087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private CollationSettings settings;
9097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private Sink sink;
9117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private Importer importer;
9127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int ruleIndex;
9147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
915