12d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// © 2016 and later: Unicode, Inc. and others.
22d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 1996-2010, International Business Machines Corporation and    *
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved.                                                *
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text;
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.ArrayList;
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.List;
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.UtilityExtensions;
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * A set of rules for a <code>RuleBasedTransliterator</code>.  This set encodes
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the transliteration in one direction from one set of characters or short
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * strings to another.  A <code>RuleBasedTransliterator</code> consists of up to
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * two such sets, one for the forward direction, and one for the reverse.
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>A <code>TransliterationRuleSet</code> has one important operation, that of
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * finding a matching rule at a given point in the text.  This is accomplished
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * by the <code>findMatch()</code> method.
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @author Alan Liu
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertclass TransliterationRuleSet {
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Vector of rules, in the order added.
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private List<TransliterationRule> ruleVector;
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Length of the longest preceding context
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int maxContextLength;
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Sorted and indexed table of rules.  This is created by freeze() from
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the rules in ruleVector.  rules.length >= ruleVector.size(), and the
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * references in rules[] are aliases of the references in ruleVector.
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * A single rule in ruleVector is listed one or more times in rules[].
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private TransliterationRule[] rules;
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Index table.  For text having a first character c, compute x = c&0xFF.
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Now use rules[index[x]..index[x+1]-1].  This index table is created by
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * freeze().
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int[] index;
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Construct a new empty rule set.
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public TransliterationRuleSet() {
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ruleVector = new ArrayList<TransliterationRule>();
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        maxContextLength = 0;
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Return the maximum context length.
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the length of the longest preceding context.
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int getMaximumContextLength() {
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return maxContextLength;
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Add a rule to this set.  Rules are added in order, and order is
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * significant.
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param rule the rule to add
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void addRule(TransliterationRule rule) {
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ruleVector.add(rule);
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int len;
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if ((len = rule.getAnteContextLength()) > maxContextLength) {
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            maxContextLength = len;
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        rules = null;
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Close this rule set to further additions, check it for masked rules,
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * and index it to optimize performance.
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @exception IllegalArgumentException if some rules are masked
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void freeze() {
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* Construct the rule array and index table.  We reorder the
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * rules by sorting them into 256 bins.  Each bin contains all
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * rules matching the index value for that bin.  A rule
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * matches an index value if string whose first key character
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * has a low byte equal to the index value can match the rule.
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Each bin contains zero or more rules, in the same order
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * they were found originally.  However, the total rules in
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * the bins may exceed the number in the original vector,
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * since rules that have a variable as their first key
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * character will generally fall into more than one bin.
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * That is, each bin contains all rules that either have that
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * first index value as their first key character, or have
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * a set containing the index value as their first character.
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int n = ruleVector.size();
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        index = new int[257]; // [sic]
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        List<TransliterationRule> v = new ArrayList<TransliterationRule>(2*n); // heuristic; adjust as needed
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* Precompute the index values.  This saves a LOT of time.
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int[] indexValue = new int[n];
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (int j=0; j<n; ++j) {
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            TransliterationRule r = ruleVector.get(j);
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            indexValue[j] = r.getIndexValue();
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (int x=0; x<256; ++x) {
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            index[x] = v.size();
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for (int j=0; j<n; ++j) {
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (indexValue[j] >= 0) {
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if (indexValue[j] == x) {
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        v.add(ruleVector.get(j));
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // If the indexValue is < 0, then the first key character is
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // a set, and we must use the more time-consuming
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // matchesIndexValue check.  In practice this happens
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // rarely, so we seldom tread this code path.
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    TransliterationRule r = ruleVector.get(j);
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if (r.matchesIndexValue(x)) {
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        v.add(r);
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        index[256] = v.size();
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* Freeze things into an array.
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        rules = new TransliterationRule[v.size()];
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        v.toArray(rules);
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuilder errors = null;
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* Check for masking.  This is MUCH faster than our old check,
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * which was each rule against each following rule, since we
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * only have to check for masking within each bin now.  It's
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * 256*O(n2^2) instead of O(n1^2), where n1 is the total rule
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * count, and n2 is the per-bin rule count.  But n2<<n1, so
1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * it's a big win.
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (int x=0; x<256; ++x) {
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for (int j=index[x]; j<index[x+1]-1; ++j) {
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                TransliterationRule r1 = rules[j];
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                for (int k=j+1; k<index[x+1]; ++k) {
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    TransliterationRule r2 = rules[k];
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if (r1.masks(r2)) {
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if (errors == null) {
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            errors = new StringBuilder();
1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        } else {
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            errors.append("\n");
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        errors.append("Rule " + r1 + " masks " + r2);
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (errors != null) {
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IllegalArgumentException(errors.toString());
1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Transliterate the given text with the given UTransPosition
1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * indices.  Return TRUE if the transliteration should continue
1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * or FALSE if it should halt (because of a U_PARTIAL_MATCH match).
1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Note that FALSE is only ever returned if isIncremental is TRUE.
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param text the text to be transliterated
1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param pos the position indices, which will be updated
1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param incremental if TRUE, assume new text may be inserted
1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * at index.limit, and return FALSE if thre is a partial match.
1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return TRUE unless a U_PARTIAL_MATCH has been obtained,
1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * indicating that transliteration should stop until more text
1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * arrives.
1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean transliterate(Replaceable text,
1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                 Transliterator.Position pos,
1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                 boolean incremental) {
1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int indexByte = text.char32At(pos.start) & 0xFF;
1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (int i=index[indexByte]; i<index[indexByte+1]; ++i) {
1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int m = rules[i].matchAndReplace(text, pos, incremental);
1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            switch (m) {
1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            case UnicodeMatcher.U_MATCH:
1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (Transliterator.DEBUG) {
1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    System.out.println((incremental ? "Rule.i: match ":"Rule: match ") +
2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                       rules[i].toRule(true) + " => " +
2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                       UtilityExtensions.formatInput(text, pos));
2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return true;
2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            case UnicodeMatcher.U_PARTIAL_MATCH:
2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (Transliterator.DEBUG) {
2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    System.out.println((incremental ? "Rule.i: partial match ":"Rule: partial match ") +
2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                       rules[i].toRule(true) + " => " +
2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                       UtilityExtensions.formatInput(text, pos));
2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return false;
2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                default:
2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if (Transliterator.DEBUG) {
2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        System.out.println("Rule: no match " + rules[i]);
2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // No match or partial match from any rule
2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        pos.start += UTF16.getCharCount(text.char32At(pos.start));
2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (Transliterator.DEBUG) {
2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            System.out.println((incremental ? "Rule.i: no match => ":"Rule: no match => ") +
2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                               UtilityExtensions.formatInput(text, pos));
2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return true;
2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Create rule strings that represents this rule set.
2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    String toRules(boolean escapeUnprintable) {
2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int i;
2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int count = ruleVector.size();
2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuilder ruleSource = new StringBuilder();
2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (i=0; i<count; ++i) {
2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (i != 0) {
2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ruleSource.append('\n');
2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            TransliterationRule r = ruleVector.get(i);
2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ruleSource.append(r.toRule(escapeUnprintable));
2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return ruleSource.toString();
2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // TODO Handle the case where we have :: [a] ; a > |b ; b > c ;
2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // TODO Merge into r.addSourceTargetSet, to avoid duplicate testing
2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    void addSourceTargetSet(UnicodeSet filter, UnicodeSet sourceSet, UnicodeSet targetSet) {
2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        UnicodeSet currentFilter = new UnicodeSet(filter);
2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        UnicodeSet revisiting = new UnicodeSet();
2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int count = ruleVector.size();
2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (int i=0; i<count; ++i) {
2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            TransliterationRule r = ruleVector.get(i);
2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            r.addSourceTargetSet(currentFilter, sourceSet, targetSet, revisiting.clear());
2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            currentFilter.addAll(revisiting);
2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
257