12d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// © 2016 and later: Unicode, Inc. and others.
22d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
59e281ba4837cba4a1cf9523d6f8b0621b150063dScott Russell * Copyright (C) 2005-2016 International Business Machines Corporation and
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved.
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text;
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport static com.ibm.icu.impl.CharacterIteration.DONE32;
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport static com.ibm.icu.impl.CharacterIteration.next32;
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport static com.ibm.icu.impl.CharacterIteration.nextTrail32;
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport static com.ibm.icu.impl.CharacterIteration.previous32;
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.io.ByteArrayOutputStream;
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.io.IOException;
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.io.InputStream;
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.io.OutputStream;
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.nio.ByteBuffer;
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.text.CharacterIterator;
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.concurrent.ConcurrentHashMap;
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.Assert;
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.CharTrie;
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.CharacterIteration;
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.ICUBinary;
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.ICUDebug;
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UCharacter;
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UProperty;
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UScript;
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
3587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert * Rule Based Break Iterator
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * This is a port of the C++ class RuleBasedBreakIterator from ICU4C.
3787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert *
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic class RuleBasedBreakIterator extends BreakIterator {
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //=======================================================================
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Constructors & Factories
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //=======================================================================
4487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
4587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert    /**
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * private constructor
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private RuleBasedBreakIterator() {
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fLastStatusIndexValid = true;
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fDictionaryCharCount  = 0;
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fBreakEngines.put(-1, fUnhandledBreakEngine);
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Create a break iterator from a precompiled set of break rules.
5687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert     *
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Creating a break iterator from the binary rules is much faster than
5887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert     * creating one from source rules.
5987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert     *
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The binary rules are generated by the RuleBasedBreakIterator.compileRules() function.
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Binary break iterator rules are not guaranteed to be compatible between
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * different versions of ICU.
6387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert     *
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param is an input stream supplying the compiled binary rules.
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws IOException if there is an error while reading the rules from the InputStream.
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see    #compileRules(String, OutputStream)
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 4.8
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static RuleBasedBreakIterator getInstanceFromCompiledRules(InputStream is) throws IOException {
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        RuleBasedBreakIterator  This = new RuleBasedBreakIterator();
71aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert        This.fRData = RBBIDataWrapper.get(ICUBinary.getByteBufferFromInputStreamAndCloseStream(is));
7287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        return This;
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Create a break iterator from a precompiled set of break rules.
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Creating a break iterator from the binary rules is much faster than
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * creating one from source rules.
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The binary rules are generated by the RuleBasedBreakIterator.compileRules() function.
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Binary break iterator rules are not guaranteed to be compatible between
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * different versions of ICU.
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param bytes a buffer supplying the compiled binary rules.
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws IOException if there is an error while reading the rules from the buffer.
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see    #compileRules(String, OutputStream)
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @internal
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @deprecated This API is ICU internal only.
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Deprecated
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static RuleBasedBreakIterator getInstanceFromCompiledRules(ByteBuffer bytes) throws IOException {
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        RuleBasedBreakIterator  This = new RuleBasedBreakIterator();
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        This.fRData = RBBIDataWrapper.get(bytes);
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return This;
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Construct a RuleBasedBreakIterator from a set of rules supplied as a string.
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param rules The break rules to be used.
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.2
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public RuleBasedBreakIterator(String rules)  {
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        this();
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        try {
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ByteArrayOutputStream ruleOS = new ByteArrayOutputStream();
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            compileRules(rules, ruleOS);
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            fRData = RBBIDataWrapper.get(ByteBuffer.wrap(ruleOS.toByteArray()));
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } catch (IOException e) {
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ///CLOVER:OFF
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // An IO exception can only arrive here if there is a bug in the RBBI Rule compiler,
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            //  causing bogus compiled rules to be produced, but with no compile error raised.
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            RuntimeException rte = new RuntimeException("RuleBasedBreakIterator rule compilation internal error: "
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    + e.getMessage());
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw rte;
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ///CLOVER:ON
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //=======================================================================
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Boilerplate
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //=======================================================================
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Clones this iterator.
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return A newly-constructed RuleBasedBreakIterator with the same
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * behavior as this one.
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1302d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert    @Override
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public Object clone()
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        RuleBasedBreakIterator result = (RuleBasedBreakIterator)super.clone();
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (fText != null) {
13587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            result.fText = (CharacterIterator)(fText.clone());
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return result;
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns true if both BreakIterators are of the same class, have the same
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * rules, and iterate over the same text.
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1452d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert    @Override
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean equals(Object that) {
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (that == null) {
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return false;
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (this == that) {
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return true;
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        try {
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            RuleBasedBreakIterator other = (RuleBasedBreakIterator) that;
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (fRData != other.fRData && (fRData == null || other.fRData == null)) {
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return false;
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
15887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            if (fRData != null && other.fRData != null &&
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    (!fRData.fRuleSource.equals(other.fRData.fRuleSource))) {
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return false;
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (fText == null && other.fText == null) {
16387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                return true;
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (fText == null || other.fText == null) {
16687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                return false;
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return fText.equals(other.fText);
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        catch(ClassCastException e) {
1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return false;
1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     }
1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the description (rules) used to create this iterator.
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * (In ICU4C, the same function is RuleBasedBreakIterator::getRules())
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1802d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert    @Override
1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public String toString() {
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String retStr = "";
1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (fRData != null) {
1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            retStr =  fRData.fRuleSource;
1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return retStr;
1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Compute a hashcode for this BreakIterator
1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return A hash code
1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1942d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert    @Override
1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int hashCode()
1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
19787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        return fRData.fRuleSource.hashCode();
1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int  START_STATE = 1;     // The state number of the starting state
2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int  STOP_STATE  = 0;     // The state-transition value indicating "stop"
20387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // RBBIRunMode - the state machine runs an extra iteration at the beginning and end
2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //               of user text.  A variable with this enum type keeps track of where we
2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //               are.  The state machine only fetches user text input while in RUN mode.
2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int  RBBI_START  = 0;
2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int  RBBI_RUN    = 1;
2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int  RBBI_END    = 2;
2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /*
2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The character iterator through which this BreakIterator accesses the text.
2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private CharacterIterator   fText = new java.text.StringCharacterIterator("");
21587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The rule data for this BreakIterator instance. Package private.
2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    RBBIDataWrapper             fRData;
22087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /*
22287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert     * Index of the Rule {tag} values for the most recent match.
2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int                 fLastRuleStatusIndex;
2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /*
2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Rule tag value valid flag.
2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Some iterator operations don't intrinsically set the correct tag value.
2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This flag lets us lazily compute the value if we are ever asked for it.
2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean             fLastStatusIndexValid;
2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Counter for the number of characters encountered with the "dictionary"
2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   flag set.  Normal RBBI iterators don't use it, although the code
2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   for updating it is live.  Dictionary Based break iterators (a subclass
2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   of us) access this field directly.
2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @internal
2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int fDictionaryCharCount;
2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /*
2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * ICU debug argument name for RBBI
2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final String RBBI_DEBUG_ARG = "rbbi";
2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Debugging flag.  Trace operation of state machine when true.
2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final boolean TRACE = ICUDebug.enabled(RBBI_DEBUG_ARG)
2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            && ICUDebug.value(RBBI_DEBUG_ARG).indexOf("trace") >= 0;
2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
25487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert     * What kind of break iterator this is. Set to KIND_LINE by default,
2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * since this produces sensible output.
2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int fBreakType = KIND_LINE;
25887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The "default" break engine - just skips over ranges of dictionary words,
2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * producing no breaks. Should only be used if characters need to be handled
2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * by a dictionary but we have no dictionary implementation for them.
2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private final UnhandledBreakEngine fUnhandledBreakEngine = new UnhandledBreakEngine();
26587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * when a range of characters is divided up using the dictionary, the break
2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * positions that are discovered are stored here, preventing us from having
2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * to use either the dictionary or the state table again until the iterator
2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * leaves this range of text
2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int[] fCachedBreakPositions;
2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * if fCachedBreakPositions is not null, this indicates which item in the
2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * cache the current iteration position refers to
2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int fPositionInCache;
2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
28087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
28187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert    private final ConcurrentHashMap<Integer, LanguageBreakEngine> fBreakEngines =
2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            new ConcurrentHashMap<Integer, LanguageBreakEngine>();
2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Dumps caches and performs other actions associated with a complete change
2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * in text or iteration position.
2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void reset() {
2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fCachedBreakPositions = null;
2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // fNumCachedBreakPositions = 0;
2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fDictionaryCharCount = 0;
2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fPositionInCache = 0;
2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Dump the contents of the state table and character classes for this break iterator.
2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * For debugging only.
2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @internal
2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @deprecated This API is ICU internal only.
2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Deprecated
3012d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert    public void dump(java.io.PrintStream out) {
3022d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        if (out == null) {
3032d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            out = System.out;
3042d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        }
3052d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        this.fRData.dump(out);
3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Compile a set of source break rules into the binary state tables used
3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * by the break iterator engine.  Creating a break iterator from precompiled
3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * rules is much faster than creating one from source rules.
31287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert     *
3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Binary break rules are not guaranteed to be compatible between different
3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * versions of ICU.
31587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert     *
31687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert     *
3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param rules  The source form of the break rules
3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param ruleBinary  An output stream to receive the compiled rules.
3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws IOException If there is an error writing the output.
3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #getInstanceFromCompiledRules(InputStream)
3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 4.8
3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static void compileRules(String rules, OutputStream ruleBinary) throws IOException {
3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        RBBIRuleBuilder.compileRules(rules, ruleBinary);
3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
32687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //=======================================================================
3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // BreakIterator overrides
3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //=======================================================================
3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Sets the current iteration position to the beginning of the text.
3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * (i.e., the CharacterIterator's starting offset).
3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The offset of the beginning of the text.
3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3372d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert    @Override
3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int first() {
3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fCachedBreakPositions = null;
3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fDictionaryCharCount = 0;
3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fPositionInCache = 0;
3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fLastRuleStatusIndex  = 0;
3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fLastStatusIndexValid = true;
3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (fText == null) {
3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return BreakIterator.DONE;
3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fText.first();
3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return fText.getIndex();
3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
35087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Sets the current iteration position to the end of the text.
3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * (i.e., the CharacterIterator's ending offset).
3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The text's past-the-end offset.
3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3572d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert    @Override
3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int last() {
3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fCachedBreakPositions = null;
3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fDictionaryCharCount = 0;
3617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fPositionInCache = 0;
3627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (fText == null) {
3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            fLastRuleStatusIndex  = 0;
3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            fLastStatusIndexValid = true;
3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return BreakIterator.DONE;
3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // t.last() returns the offset of the last character,
3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // rather than the past-the-end offset
3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // so a loop like for(p=it.last(); p!=DONE; p=it.previous()) ...
3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // will work correctly.
3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fLastStatusIndexValid = false;
3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int pos = fText.getEndIndex();
3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fText.setIndex(pos);
3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return pos;
3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
37887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
3797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Advances the iterator either forward or backward the specified number of steps.
3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Negative values move backward, and positive values move forward.  This is
3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * equivalent to repeatedly calling next() or previous().
3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param n The number of steps to move.  The sign indicates the direction
3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * (negative is backwards, and positive is forwards).
3857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The character offset of the boundary position n boundaries away from
3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the current one.
3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3892d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert    @Override
3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int next(int n) {
3917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int result = current();
3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while (n > 0) {
3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result = next();
3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            --n;
3957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while (n < 0) {
3977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result = previous();
3987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ++n;
3997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return result;
4017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
40287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
4037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Advances the iterator to the next boundary position.
4057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The position of the first boundary after this one.
4067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
4077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4082d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert    @Override
4097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int next() {
4107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // if we have cached break positions and we're still in the range
4117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // covered by them, just move one step forward in the cache
4127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (fCachedBreakPositions != null) {
4137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (fPositionInCache < fCachedBreakPositions.length - 1) {
4147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ++fPositionInCache;
4157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int pos = fCachedBreakPositions[fPositionInCache];
4167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                fText.setIndex(pos);
4177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return pos;
4187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            else {
4207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                reset();
4217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int startPos = current();
4257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fDictionaryCharCount = 0;
4267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int result = handleNext(fRData.fFTable);
4277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (fDictionaryCharCount > 0) {
4287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result = checkDictionary(startPos, result, false);
4297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return result;
4317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert      *  checkDictionary      This function handles all processing of characters in
4357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert      *                       the "dictionary" set. It will determine the appropriate
4367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert      *                       course of action, and possibly set up a cache in the
4377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert      *                       process.
4387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert      */
4397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int checkDictionary(int startPos, int endPos, boolean reverse) {
44087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
4417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Reset the old break cache first.
4427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        reset();
4437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
44487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        // note: code segment below assumes that dictionary chars are in the
4457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // startPos-endPos range
4467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // value returned should be next character in sequence
4477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if ((endPos - startPos) <= 1) {
4487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return (reverse ? startPos : endPos);
4497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Starting from the starting point, scan towards the proposed result,
4527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // looking for the first dictionary character (which may be the one
4537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // we're on, if we're starting in the middle of a range).
4547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fText.setIndex(reverse ? endPos : startPos);
4557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (reverse) {
4567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CharacterIteration.previous32(fText);
4577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int  rangeStart = startPos;
4607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int  rangeEnd = endPos;
4617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int    category;
4637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int    current;
4647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        DictionaryBreakEngine.DequeI breaks = new DictionaryBreakEngine.DequeI();
4657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int     foundBreakCount = 0;
4667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int     c = CharacterIteration.current32(fText);
4677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        category = (short)fRData.fTrie.getCodePointValue(c);
4687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Is the character we're starting on a dictionary character? If so, we
4707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // need to back up to include the entire run; otherwise the results of
4717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // the break algorithm will differ depending on where we start. Since
4727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // the result is cached and there is typically a non-dictionary break
4737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // within a small number of words, there should be little performance impact.
4747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if ((category & 0x4000) != 0) {
4757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (reverse) {
4767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                do {
4777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    CharacterIteration.next32(fText);
4787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    c = CharacterIteration.current32(fText);
4797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    category = (short)fRData.fTrie.getCodePointValue(c);
4807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } while (c != CharacterIteration.DONE32 && ((category & 0x4000)) != 0);
48187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
4827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Back up to the last dictionary character
4837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                rangeEnd = fText.getIndex();
4847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (c == CharacterIteration.DONE32) {
4857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // c = fText->last32();
4867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    //   TODO:  why was this if needed?
4877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    c = CharacterIteration.previous32(fText);
4887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
4897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                else {
4907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    c = CharacterIteration.previous32(fText);
4917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
4927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            else {
4947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                do {
4957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    c = CharacterIteration.previous32(fText);
4967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    category = (short)fRData.fTrie.getCodePointValue(c);
4977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
4987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                while (c != CharacterIteration.DONE32 && ((category & 0x4000) != 0));
4997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Back up to the last dictionary character
5007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (c == CharacterIteration.DONE32) {
5017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // c = fText->first32();
5027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    c = CharacterIteration.current32(fText);
5037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
5047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                else {
5057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    CharacterIteration.next32(fText);
5067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    c = CharacterIteration.current32(fText);
5077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
5087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                rangeStart = fText.getIndex();
5097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            category = (short)fRData.fTrie.getCodePointValue(c);
5117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
51387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
5147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Loop through the text, looking for ranges of dictionary characters.
5157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // For each span, find the appropriate break engine, and ask it to find
5167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // any breaks within the span.
5177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Note: we always do this in the forward direction, so that the break
5187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // cache is built in the right order.
5197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (reverse) {
5207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            fText.setIndex(rangeStart);
5217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            c = CharacterIteration.current32(fText);
5227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            category = (short)fRData.fTrie.getCodePointValue(c);
5237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        LanguageBreakEngine lbe = null;
5257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while(true) {
5267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while((current = fText.getIndex()) < rangeEnd && (category & 0x4000) == 0) {
5277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                CharacterIteration.next32(fText);
5287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c = CharacterIteration.current32(fText);
5297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                category = (short)fRData.fTrie.getCodePointValue(c);
5307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (current >= rangeEnd) {
5327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
5337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
53487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
5357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // We now have a dictionary character. Get the appropriate language object
5367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // to deal with it.
5377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            lbe = getLanguageBreakEngine(c);
53887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
5397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Ask the language object if there are any breaks. It will leave the text
5407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // pointer on the other side of its range, ready to search for the next one.
5417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (lbe != null) {
5427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int startingIdx = fText.getIndex();
5437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                foundBreakCount += lbe.findBreaks(fText, rangeStart, rangeEnd, false, fBreakType, breaks);
5447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                assert fText.getIndex() > startingIdx;
5457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
54687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
5477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Reload the loop variables for the next go-round
5487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            c = CharacterIteration.current32(fText);
5497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            category = (short)fRData.fTrie.getCodePointValue(c);
5507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
55187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
5527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // If we found breaks, build a new break cache. The first and last entries must
5537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // be the original starting and ending position.
5547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (foundBreakCount > 0) {
5557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (foundBreakCount != breaks.size()) {
5567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                System.out.println("oops, foundBreakCount != breaks.size().  LBE = " + lbe.getClass());
5577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assert foundBreakCount == breaks.size();
5597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (startPos < breaks.peekLast()) {
5607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                breaks.offer(startPos);
5617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (endPos > breaks.peek()) {
5637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                breaks.push(endPos);
5647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
56587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
5667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // TODO: get rid of this array, use results from the deque directly
5677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            fCachedBreakPositions = new int[breaks.size()];
56887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
5697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int i = 0;
5707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while (breaks.size() > 0) {
5717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                fCachedBreakPositions[i++] = breaks.pollLast();
5727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
57387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
5747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // If there are breaks, then by definition, we are replacing the original
5757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // proposed break by one of the breaks we found. Use following() and
5767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // preceding() to do the work. They should never recurse in this case.
5777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (reverse) {
5787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return preceding(endPos);
5797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            else {
5817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return following(startPos);
5827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // If we get here, there were no language-based breaks. Set the text pointer
5867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // to the original proposed break.
5877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fText.setIndex(reverse ? startPos : endPos);
5887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (reverse ? startPos : endPos);
58987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
5907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
59187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
59287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
5937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Moves the iterator backwards, to the last boundary preceding this one.
5957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The position of the last boundary position preceding this one.
5967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
5977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5982d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert    @Override
5997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int previous() {
6007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int result;
6017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int startPos;
60287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
6037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CharacterIterator text = getText();
6047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fLastStatusIndexValid = false;
6067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // if we have cached break positions and we're still in the range
6087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // covered by them, just move one step backward in the cache
6097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (fCachedBreakPositions != null) {
6107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (fPositionInCache > 0) {
6117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                --fPositionInCache;
6127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // If we're at the beginning of the cache, need to reevaluate the
6137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // rule status
6147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (fPositionInCache <= 0) {
6157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    fLastStatusIndexValid = false;
6167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
6177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int pos = fCachedBreakPositions[fPositionInCache];
6187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                text.setIndex(pos);
6197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return pos;
6207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
6217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                reset();
6227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
6237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // if we're already sitting at the beginning of the text, return DONE
6267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        startPos = current();
6277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (fText == null || startPos == fText.getBeginIndex()) {
6287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            fLastRuleStatusIndex  = 0;
6297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            fLastStatusIndexValid = true;
6307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return BreakIterator.DONE;
6317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Rules with an exact reverse table are handled here.
6347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (fRData.fSRTable != null || fRData.fSFTable != null) {
6357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result =  handlePrevious(fRData.fRTable);
6367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (fDictionaryCharCount > 0) {
6377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                result = checkDictionary(result, startPos, true);
6387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
6397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return result;
6407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // old rule syntax
6437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // set things up.  handlePrevious() will back us up to some valid
6447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // break position before the current position (we back our internal
6457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // iterator up one step to prevent handlePrevious() from returning
6467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // the current position), but not necessarily the last one before
6477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // where we started
6487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int       start = current();
6507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        previous32(fText);
6527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int       lastResult    = handlePrevious(fRData.fRTable);
6537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (lastResult == BreakIterator.DONE) {
6547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            lastResult = fText.getBeginIndex();
6557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            fText.setIndex(lastResult);
6567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        result = lastResult;
6587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int      lastTag       = 0;
6597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean  breakTagValid = false;
6607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // iterate forward from the known break position until we pass our
6627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // starting point.  The last break position before the starting
6637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // point is our return value
6647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (;;) {
6667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result         = next();
6677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (result == BreakIterator.DONE || result >= start) {
6687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
6697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
6707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            lastResult     = result;
6717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            lastTag        = fLastRuleStatusIndex;
6727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            breakTagValid  = true;
6737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // fLastBreakTag wants to have the value for section of text preceding
6767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // the result position that we are to return (in lastResult.)  If
6777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // the backwards rules overshot and the above loop had to do two or more
6787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // handleNext()s to move up to the desired return position, we will have a valid
6797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // tag value. But, if handlePrevious() took us to exactly the correct result position,
6807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // we wont have a tag value for that position, which is only set by handleNext().
6817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Set the current iteration position to be the last break position
6837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // before where we started, and then return that value.
6847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fText.setIndex(lastResult);
6857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fLastRuleStatusIndex  = lastTag;       // for use by getRuleStatus()
6867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fLastStatusIndexValid = breakTagValid;
6877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return lastResult;
6887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
6917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Sets the iterator to refer to the first boundary position following
6927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the specified position.
6937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param offset The position from which to begin searching for a break position.
6947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The position of the first break after the current position.
6957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
6967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
6972d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert    @Override
6987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int following(int offset) {
6997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CharacterIterator text = getText();
7007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // if we have no cached break positions, or if "offset" is outside the
7027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // range covered by the cache, then dump the cache and call our
7037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // inherited following() method.  This will call other methods in this
7047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // class that may refresh the cache.
7057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (fCachedBreakPositions == null || offset < fCachedBreakPositions[0] ||
7067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                offset >= fCachedBreakPositions[fCachedBreakPositions.length - 1]) {
7077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            fCachedBreakPositions = null;
7087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return rulesFollowing(offset);
7097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // on the other hand, if "offset" is within the range covered by the
7127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // cache, then just search the cache for the first break position
7137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // after "offset"
7147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        else {
7157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            fPositionInCache = 0;
7167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while (fPositionInCache < fCachedBreakPositions.length
7177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                   && offset >= fCachedBreakPositions[fPositionInCache])
7187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ++fPositionInCache;
7197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            text.setIndex(fCachedBreakPositions[fPositionInCache]);
7207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return text.getIndex();
7217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
72387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
7247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int rulesFollowing(int offset) {
7257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // if the offset passed in is already past the end of the text,
7267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // just return DONE; if it's before the beginning, return the
7277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // text's starting offset
7287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fLastRuleStatusIndex  = 0;
7297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fLastStatusIndexValid = true;
7307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (fText == null || offset >= fText.getEndIndex()) {
7317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            last();
7327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return next();
7337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        else if (offset < fText.getBeginIndex()) {
7357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return first();
7367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // otherwise, set our internal iteration position (temporarily)
7397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // to the position passed in.  If this is the _beginning_ position,
7407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // then we can just use next() to get our return value
7417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int result = 0;
7437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (fRData.fSRTable != null) {
7457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Safe Point Reverse rules exist.
7467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            //   This allows us to use the optimum algorithm.
7477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            fText.setIndex(offset);
7487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // move forward one codepoint to prepare for moving back to a
7497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // safe point.
7507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // this handles offset being between a supplementary character
7517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            next32(fText);
7527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // handlePrevious will move most of the time to < 1 boundary away
7537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            handlePrevious(fRData.fSRTable);
7547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result = next();
7557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while (result <= offset) {
7567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                result = next();
7577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
7587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return result;
7597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (fRData.fSFTable != null) {
7617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // No Safe point reverse table, but there is a safe pt forward table.
76287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            //
7637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            fText.setIndex(offset);
7647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            previous32(fText);
7657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // handle next will give result >= offset
7667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            handleNext(fRData.fSFTable);
7677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // previous will give result 0 or 1 boundary away from offset,
7687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // most of the time
7697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // we have to
7707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int oldresult = previous();
7717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while (oldresult > offset) {
7727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                result = previous();
7737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (result <= offset) {
7747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return oldresult;
7757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
7767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                oldresult = result;
7777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
7787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result = next();
7797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (result <= offset) {
7807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return next();
7817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
7827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return result;
7837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // otherwise, we have to sync up first.  Use handlePrevious() to back
7857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // us up to a known break position before the specified position (if
7867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // we can determine that the specified position is a break position,
7877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // we don't back up at all).  This may or may not be the last break
7887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // position at or before our starting position.  Advance forward
7897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // from here until we've passed the starting position.  The position
7907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // we stop on will be the first break position after the specified one.
7917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // old rule syntax
7927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fText.setIndex(offset);
7947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (offset == fText.getBeginIndex()) {
7957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return next();
7967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        result = previous();
7987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while (result != BreakIterator.DONE && result <= offset) {
8007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result = next();
8017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return result;
8047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
8067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Sets the iterator to refer to the last boundary position before the
8077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * specified position.
8087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param offset The position to begin searching for a break from.
8097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The position of the last boundary before the starting position.
8107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
8117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
8122d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert    @Override
8137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int preceding(int offset) {
8147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CharacterIterator text = getText();
8157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // if we have no cached break positions, or "offset" is outside the
8177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // range covered by the cache, we can just call the inherited routine
8187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // (which will eventually call other routines in this class that may
8197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // refresh the cache)
8207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (fCachedBreakPositions == null || offset <= fCachedBreakPositions[0] ||
8217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                offset > fCachedBreakPositions[fCachedBreakPositions.length - 1]) {
8227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            fCachedBreakPositions = null;
8237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return rulesPreceding(offset);
8247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // on the other hand, if "offset" is within the range covered by the cache,
8277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // then all we have to do is search the cache for the last break position
8287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // before "offset"
8297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        else {
8307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            fPositionInCache = 0;
8317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while (fPositionInCache < fCachedBreakPositions.length
8327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                   && offset > fCachedBreakPositions[fPositionInCache])
8337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ++fPositionInCache;
8347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            --fPositionInCache;
8357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            text.setIndex(fCachedBreakPositions[fPositionInCache]);
8367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return text.getIndex();
8377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
83987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
8407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int rulesPreceding(int offset) {
8417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // if the offset passed in is already past the end of the text,
8427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // just return DONE; if it's before the beginning, return the
8437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // text's starting offset
8457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (fText == null || offset > fText.getEndIndex()) {
8467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // return BreakIterator::DONE;
8477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return last();
8487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        else if (offset < fText.getBeginIndex()) {
8507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return first();
8517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // if we start by updating the current iteration position to the
8547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // position specified by the caller, we can just use previous()
8557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // to carry out this operation
8567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int  result;
8587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (fRData.fSFTable != null) {
8597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            /// todo synwee
8607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // new rule syntax
8617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            fText.setIndex(offset);
8627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // move backwards one codepoint to prepare for moving forwards to a
8637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // safe point.
8647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // this handles offset being between a supplementary character
8657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            previous32(fText);
8667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            handleNext(fRData.fSFTable);
8677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result = previous();
8687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while (result >= offset) {
8697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                result = previous();
8707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
8717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return result;
8727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (fRData.fSRTable != null) {
8747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // backup plan if forward safe table is not available
8757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            fText.setIndex(offset);
8767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            next32(fText);
8777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // handle previous will give result <= offset
8787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            handlePrevious(fRData.fSRTable);
8797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // next will give result 0 or 1 boundary away from offset,
8817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // most of the time
8827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // we have to
8837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int oldresult = next();
8847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while (oldresult < offset) {
8857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                result = next();
8867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (result >= offset) {
8877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return oldresult;
8887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
8897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                oldresult = result;
8907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
8917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result = previous();
8927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (result >= offset) {
8937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return previous();
8947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
8957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return result;
8967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // old rule syntax
8997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fText.setIndex(offset);
9007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return previous();
9017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
9027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
9049e281ba4837cba4a1cf9523d6f8b0621b150063dScott Russell     * Throw IllegalArgumentException unless begin &lt;= offset &lt; end.
9057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
9067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
9077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected static final void checkOffset(int offset, CharacterIterator text) {
9087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (offset < text.getBeginIndex() || offset > text.getEndIndex()) {
9097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IllegalArgumentException("offset out of bounds");
9107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
9127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
9157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns true if the specified position is a boundary position.  As a side
9167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * effect, leaves the iterator pointing to the first boundary position at
9177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * or after "offset".
9187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param offset the offset to check.
9197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return True if "offset" is a boundary position.
9207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
9217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
9222d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert    @Override
9237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean isBoundary(int offset) {
9247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkOffset(offset, fText);
9257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // the beginning index of the iterator is always a boundary position by definition
9277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (offset == fText.getBeginIndex()) {
9287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            first();       // For side effects on current position, tag values.
9297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return true;
9307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (offset == fText.getEndIndex()) {
9337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            last();       // For side effects on current position, tag values.
9347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return true;
9357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // otherwise, we can use following() on the position before the specified
9387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // one and return true if the position we get back is the one the user
9397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // specified
9407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // return following(offset - 1) == offset;
9427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // TODO:  check whether it is safe to revert to the simpler offset-1 code
9437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //         The safe rules may take care of unpaired surrogates ok.
9447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fText.setIndex(offset);
9457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        previous32(fText);
9467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int  pos = fText.getIndex();
9477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean result = following(pos) == offset;
9487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return result;
9497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
9507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
9527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the current iteration position.
9537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The current iteration position.
9547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
9557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
9562d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert    @Override
9577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int current() {
9587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (fText != null) ? fText.getIndex() : BreakIterator.DONE;
9597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
9607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void makeRuleStatusValid() {
9627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (fLastStatusIndexValid == false) {
9637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            //  No cached status is available.
9647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int curr = current();
9657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (curr == BreakIterator.DONE || curr == fText.getBeginIndex()) {
9667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //  At start of text, or there is no text.  Status is always zero.
9677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                fLastRuleStatusIndex = 0;
9687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                fLastStatusIndexValid = true;
9697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
9707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //  Not at start of text.  Find status the tedious way.
9717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int pa = fText.getIndex();
9727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                first();
9737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int pb = current();
9747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                while (fText.getIndex() < pa) {
9757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    pb = next();
9767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
9777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                Assert.assrt(pa == pb);
9787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
9797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            Assert.assrt(fLastStatusIndexValid == true);
9807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            Assert.assrt(fLastRuleStatusIndex >= 0  &&  fLastRuleStatusIndex < fRData.fStatusTable.length);
9817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
9837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
9857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Return the status tag from the break rule that determined the most recently
9867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * returned break position.  The values appear in the rule source
9877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * within brackets, {123}, for example.  For rules that do not specify a
9887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * status, a default value of 0 is returned.  If more than one rule applies,
9897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the numerically largest of the possible status values is returned.
9907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
9917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Of the standard types of ICU break iterators, only the word break
9927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * iterator provides status values.  The values are defined in
9937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * class RuleBasedBreakIterator, and allow distinguishing between words
9947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * that contain alphabetic letters, "words" that appear to be numbers,
9957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * punctuation and spaces, words containing ideographic characters, and
9967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * more.  Call <code>getRuleStatus</code> after obtaining a boundary
9979e281ba4837cba4a1cf9523d6f8b0621b150063dScott Russell     * position from <code>next()</code>, <code>previous()</code>, or
9987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * any other break iterator functions that returns a boundary position.
9997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
10007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the status from the break rule that determined the most recently
10017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * returned break position.
10027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
10037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @draft ICU 3.0 (retain)
10047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @provisional This is a draft API and might change in a future release of ICU.
10057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
10067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10072d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert    @Override
10087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int  getRuleStatus() {
10097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        makeRuleStatusValid();
10107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //   Status records have this form:
10117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //           Count N         <--  fLastRuleStatusIndex points here.
10127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //           Status val 0
10137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //           Status val 1
10147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //              ...
10157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //           Status val N-1  <--  the value we need to return
10167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //   The status values are sorted in ascending order.
10177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //   This function returns the last (largest) of the array of status values.
10187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int  idx = fLastRuleStatusIndex + fRData.fStatusTable[fLastRuleStatusIndex];
10197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int  tagVal = fRData.fStatusTable[idx];
10207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return tagVal;
10217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
10227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
102487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert     * Get the status (tag) values from the break rule(s) that determined the most
10257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * recently returned break position.  The values appear in the rule source
10267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * within brackets, {123}, for example.  The default status value for rules
10277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * that do not explicitly provide one is zero.
10287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
10297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The status values used by the standard ICU break rules are defined
10307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * as public constants in class RuleBasedBreakIterator.
10317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
10327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If the size  of the output array is insufficient to hold the data,
10337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *  the output will be truncated to the available length.  No exception
10347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *  will be thrown.
10357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
103687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert     * @param fillInArray an array to be filled in with the status values.
103787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert     * @return          The number of rule status values from rules that determined
10387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                  the most recent boundary returned by the break iterator.
10397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                  In the event that the array is too small, the return value
10407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                  is the total number of status values that were available,
10417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                  not the reduced number that were actually returned.
10427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @draft ICU 3.0 (retain)
10437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @provisional This is a draft API and might change in a future release of ICU.
10447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
10452d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert    @Override
10467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int getRuleStatusVec(int[] fillInArray) {
10477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        makeRuleStatusValid();
10487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int numStatusVals = fRData.fStatusTable[fLastRuleStatusIndex];
104987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        if (fillInArray != null) {
10507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int numToCopy = Math.min(numStatusVals, fillInArray.length);
10517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for (int i=0; i<numToCopy; i++) {
10527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                fillInArray[i] = fRData.fStatusTable[fLastRuleStatusIndex + i + 1];
10537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
10547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
10557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return numStatusVals;
10567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
10577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
10597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Return a CharacterIterator over the text being analyzed.  This version
10607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * of this method returns the actual CharacterIterator we're using internally.
10617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Changing the state of this iterator can have undefined consequences.  If
10627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * you need to change it, clone it first.
10637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return An iterator over the text being analyzed.
10647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
10657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
10662d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert    @Override
10677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public CharacterIterator getText() {
10687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return fText;
10697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
10707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
10727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Set the iterator to analyze a new piece of text.  This function resets
10737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the current iteration position to the beginning of the text.
10747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param newText An iterator over the text to analyze.
10757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
10767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
10772d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert    @Override
10787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void setText(CharacterIterator newText) {
10797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fText = newText;
10807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // first() resets the caches
10817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        this.first();
10827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
10837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
10857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * package private
10867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
10877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    void setBreakType(int type) {
10887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fBreakType = type;
10897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
10907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
10927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * package private
10937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
10947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    int getBreakType() {
10957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return fBreakType;
10967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
10977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
10997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Control debug, trace and dump options.
11007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @internal
11017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
11027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final String fDebugEnv = ICUDebug.enabled(RBBI_DEBUG_ARG) ?
11037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                        ICUDebug.value(RBBI_DEBUG_ARG) : null;
110487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
110587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
11067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private LanguageBreakEngine getLanguageBreakEngine(int c) {
11077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
11087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // We have a dictionary character.
11097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Does an already instantiated break engine handle it?
11107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (LanguageBreakEngine candidate : fBreakEngines.values()) {
11117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (candidate.handles(c, fBreakType)) {
11127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return candidate;
11137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
11147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
11157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
11167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // if we don't have an existing engine, build one.
11177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
11187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (script == UScript.KATAKANA || script == UScript.HIRAGANA) {
11197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Katakana, Hiragana and Han are handled by the same dictionary engine.
11207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Fold them together for mapping from script -> engine.
11217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            script = UScript.HAN;
11227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
112387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
11247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        LanguageBreakEngine eng = fBreakEngines.get(script);
11257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /*
11267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (eng != null && !eng.handles(c, fBreakType)) {
11277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            fUnhandledBreakEngine.handleChar(c, getBreakType());
11287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            eng = fUnhandledBreakEngine;
11297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else  */  {
11307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            try {
11317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                switch (script) {
11327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                case UScript.THAI:
11337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    eng = new ThaiBreakEngine();
11347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
11357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                case UScript.LAO:
11367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    eng = new LaoBreakEngine();
11377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
11387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                case UScript.MYANMAR:
11397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    eng = new BurmeseBreakEngine();
11407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
11417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                case UScript.KHMER:
11427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    eng = new KhmerBreakEngine();
11437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
11447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                case UScript.HAN:
11457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if (getBreakType() == KIND_WORD) {
11467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        eng = new CjkBreakEngine(false);
11477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
11487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    else {
11497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        fUnhandledBreakEngine.handleChar(c, getBreakType());
11507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        eng = fUnhandledBreakEngine;
11517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
11527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
11537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                case UScript.HANGUL:
11547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if (getBreakType() == KIND_WORD) {
11557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        eng = new CjkBreakEngine(true);
11567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else {
11577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        fUnhandledBreakEngine.handleChar(c, getBreakType());
11587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        eng = fUnhandledBreakEngine;
11597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
11607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
11617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                default:
11627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    fUnhandledBreakEngine.handleChar(c, getBreakType());
11637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    eng = fUnhandledBreakEngine;
11647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
11657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
11667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } catch (IOException e) {
11677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                eng = null;
11687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
11697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
11707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
11717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (eng != null && eng != fUnhandledBreakEngine) {
11727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            LanguageBreakEngine existingEngine = fBreakEngines.putIfAbsent(script, eng);
11737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (existingEngine != null) {
11747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // There was a race & another thread was first to register an engine for this script.
11757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Use theirs and discard the one we just created.
11767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                eng = existingEngine;
11777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
11787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // assert eng.handles(c, fBreakType);
11797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
11807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return eng;
11817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
11827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
118387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert    private static final int kMaxLookaheads = 8;
118487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert    private static class LookAheadResults {
118587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        int      fUsedSlotLimit;
118687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        int[]    fPositions;
118787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        int[]    fKeys;
118887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
118987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        LookAheadResults() {
119087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            fUsedSlotLimit= 0;
119187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            fPositions = new int[kMaxLookaheads];
119287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            fKeys = new int[kMaxLookaheads];
119387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        }
119487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
119587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        int getPosition(int key) {
119687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            for (int i=0; i<fUsedSlotLimit; ++i) {
119787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                if (fKeys[i] == key) {
119887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    return fPositions[i];
119987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                }
120087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            }
120187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            assert(false);
120287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            return -1;
120387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        }
120487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
120587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        void setPosition(int key, int position) {
120687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            int i;
120787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            for (i=0; i<fUsedSlotLimit; ++i) {
120887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                if (fKeys[i] == key) {
120987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    fPositions[i] = position;
121087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    return;
121187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                }
121287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            }
121387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            if (i >= kMaxLookaheads) {
121487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                assert(false);
121587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                i = kMaxLookaheads - 1;
121687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            }
121787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            fKeys[i] = key;
121887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            fPositions[i] = position;
121987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            assert(fUsedSlotLimit == i);
122087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            fUsedSlotLimit = i + 1;
122187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        }
122287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
122387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        void reset() {
122487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            fUsedSlotLimit = 0;
122587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        }
122687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert    };
122787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert    private LookAheadResults fLookAheadMatches = new LookAheadResults();
122887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
12297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
12317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The State Machine Engine for moving forward is here.
12327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This function is the heart of the RBBI run time engine.
123387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert     *
12347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param stateTable
12357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the new iterator position
123687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert     *
12377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * A note on supplementary characters and the position of underlying
12387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Java CharacterIterator:   Normally, a character iterator is positioned at
12397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the char most recently returned by next().  Within this function, when
12407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * a supplementary char is being processed, the char iterator is left
12417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * sitting on the trail surrogate, in the middle of the code point.
12427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This is different from everywhere else, where an iterator always
12437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * points at the lead surrogate of a supplementary.
12447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
12457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int handleNext(short stateTable[]) {
12467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (TRACE) {
12477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            System.out.println("Handle Next   pos      char  state category");
12487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
12497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // No matter what, handleNext alway correctly sets the break tag value.
12517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fLastStatusIndexValid = true;
12527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fLastRuleStatusIndex  = 0;
12537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // caches for quicker access
12557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CharacterIterator text = fText;
12567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CharTrie trie = fRData.fTrie;
12577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Set up the starting char
12597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int c               = text.current();
12607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE) {
12617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            c = nextTrail32(text, c);
12627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (c == DONE32) {
12637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return BreakIterator.DONE;
12647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
12657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
12667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int initialPosition = text.getIndex();
12677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int result          = initialPosition;
12687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Set the initial state for the state machine
12707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int state           = START_STATE;
127187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        int row             = fRData.getRowIndex(state);
12727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        short category      = 3;
12737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int flagsState      = fRData.getStateTableFlags(stateTable);
12747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int mode            = RBBI_RUN;
12757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if ((flagsState & RBBIDataWrapper.RBBI_BOF_REQUIRED) != 0) {
12767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            category = 2;
12777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            mode     = RBBI_START;
12787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (TRACE) {
127987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                System.out.print("            " +  RBBIDataWrapper.intToString(text.getIndex(), 5));
12807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                System.out.print(RBBIDataWrapper.intToHexString(c, 10));
12817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                System.out.println(RBBIDataWrapper.intToString(state,7) + RBBIDataWrapper.intToString(category,6));
12827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
12837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
128487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        fLookAheadMatches.reset();
12857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // loop until we reach the end of the text or transition to state 0
12877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while (state != STOP_STATE) {
12887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (c == DONE32) {
12897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Reached end of input string.
12907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (mode == RBBI_END) {
12917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // We have already run the loop one last time with the
12927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // character set to the pseudo {eof} value. Now it is time
12937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // to unconditionally bail out.
12947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
12957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
12967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Run the loop one last time with the fake end-of-input character category
12977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                mode = RBBI_END;
12987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                category = 1;
12997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
13007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            else if (mode == RBBI_RUN) {
13017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Get the char category.  An incoming category of 1 or 2 mens that
13027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //      we are preset for doing the beginning or end of input, and
13037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //      that we shouldn't get a category from an actual text input character.
13047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //
13057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // look up the current character's character category, which tells us
13077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // which column in the state table to look at.
13087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //
13097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                category = (short) trie.getCodePointValue(c);
131087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
13117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Check the dictionary bit in the character's category.
13127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //    Counter is only used by dictionary based iterators (subclasses).
13137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //    Chars that need to be handled by a dictionary have a flag bit set
13147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //    in their category values.
13157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //
13167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if ((category & 0x4000) != 0)  {
13177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    fDictionaryCharCount++;
13187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    //  And off the dictionary flag bit.
13197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    category &= ~0x4000;
13207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
13217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (TRACE) {
132387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    System.out.print("            " +  RBBIDataWrapper.intToString(text.getIndex(), 5));
13247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    System.out.print(RBBIDataWrapper.intToHexString(c, 10));
13257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    System.out.println(RBBIDataWrapper.intToString(state,7) + RBBIDataWrapper.intToString(category,6));
13267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
13277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
132887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                // Advance to the next character.
13297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // If this is a beginning-of-input loop iteration, don't advance.
13307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //    The next iteration will be processing the first real input character.
13312d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                c = text.next();
13327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE) {
13337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    c = nextTrail32(text, c);
13347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
13357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
13367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            else {
13377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                mode = RBBI_RUN;
13387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
13397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // look up a state transition in the state table
13417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            state = stateTable[row + RBBIDataWrapper.NEXTSTATES + category];
134287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            row   = fRData.getRowIndex(state);
13437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (stateTable[row + RBBIDataWrapper.ACCEPTING] == -1) {
13457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Match found, common case
13467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                result = text.getIndex();
13477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && c <= UTF16.CODEPOINT_MAX_VALUE) {
13487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // The iterator has been left in the middle of a surrogate pair.
13497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // We want the start of it.
13507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    result--;
13517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
13527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //  Remember the break status (tag) values.
13547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                fLastRuleStatusIndex = stateTable[row + RBBIDataWrapper.TAGIDX];
13557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
13567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
135787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            int completedRule = stateTable[row + RBBIDataWrapper.ACCEPTING];
135887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            if (completedRule > 0) {
135987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                // Lookahead match is completed
136087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                int lookaheadResult = fLookAheadMatches.getPosition(completedRule);
136187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                if (lookaheadResult >= 0) {
136287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    fLastRuleStatusIndex = stateTable[row + RBBIDataWrapper.TAGIDX];
136387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    text.setIndex(lookaheadResult);
136487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    return lookaheadResult;
13657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
136687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            }
13677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
136887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            int rule =  stateTable[row + RBBIDataWrapper.LOOKAHEAD];
136987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            if (rule != 0) {
137087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                // At the position of a '/' in a look-ahead match. Record it.
137187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                int  pos = text.getIndex();
13727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && c <= UTF16.CODEPOINT_MAX_VALUE) {
13737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // The iterator has been left in the middle of a surrogate pair.
13747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // We want the beginning  of it.
137587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    pos--;
13767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
137787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                fLookAheadMatches.setPosition(rule, pos);
13787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
13797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
138087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
13817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }        // End of state machine main loop
13827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // The state machine is done.  Check whether it found a match...
13847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // If the iterator failed to advance in the match engine force it ahead by one.
13867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // This indicates a defect in the break rules, which should always match
13877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // at least one character.
138887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
13897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (result == initialPosition) {
13907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (TRACE) {
13917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                System.out.println("Iterator did not move. Advancing by 1.");
13927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
13937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            text.setIndex(initialPosition);
13947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            next32(text);
13957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result = text.getIndex();
13967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
13977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        else {
13987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Leave the iterator at our result position.
13997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            //   (we may have advanced beyond the last accepting position chasing after
14007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            //    longer matches that never completed.)
14017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            text.setIndex(result);
14027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
14037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (TRACE) {
14047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            System.out.println("result = " + result);
14057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
14067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return result;
14077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
14087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int handlePrevious(short stateTable[]) {
14107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (fText == null || stateTable == null) {
14117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return 0;
14127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
141387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
14147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int            state;
14157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int            category           = 0;
14167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int            mode;
141787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        int            row;
14187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int            c;
14197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int            result             = 0;
14207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int            initialPosition    = 0;
142187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        fLookAheadMatches.reset();
142287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
14237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // handlePrevious() never gets the rule status.
14247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Flag the status as invalid; if the user ever asks for status, we will need
14257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // to back up, then re-find the break position using handleNext(), which does
14267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // get the status value.
14277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fLastStatusIndexValid = false;
14287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fLastRuleStatusIndex  = 0;
142987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
14307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // set up the starting char
14317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        initialPosition = fText.getIndex();
14327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        result          = initialPosition;
14337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        c               = previous32(fText);
143487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
14357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Set up the initial state for the state machine
14367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        state = START_STATE;
14377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        row = fRData.getRowIndex(state);
14387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        category = 3;   // TODO:  obsolete?  from the old start/run mode scheme?
14397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        mode     = RBBI_RUN;
14407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if ((fRData.getStateTableFlags(stateTable) & RBBIDataWrapper.RBBI_BOF_REQUIRED) != 0) {
14417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            category = 2;
14427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            mode     = RBBI_START;
14437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
144487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
14457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (TRACE) {
14467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            System.out.println("Handle Prev   pos   char  state category ");
14477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
144887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
14497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // loop until we reach the beginning of the text or transition to state 0
14507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //
14517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        mainLoop: for (;;) {
145287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            if (c == DONE32) {
145387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                // Reached end of input string.
145487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                if (mode == RBBI_END || fRData.fHeader.fVersion == 1) {
145587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    // Either this is the old (ICU 3.2 and earlier) format data which
145687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    // does not support explicit support for matching {eof}, or
145787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    // we have already done the {eof} iteration.  Now is the time
145887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    // to unconditionally bail out.
145987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    if (result == initialPosition) {
146087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                        // Ran off start, no match found.
146187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                        // Move one position (towards the start, since we are doing previous.)
146287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                        fText.setIndex(initialPosition);
146387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                        previous32(fText);
14647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
146587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    break mainLoop;
14667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
146787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                mode = RBBI_END;
146887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                category = 1;
146987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            }
147087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
147187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            if (mode == RBBI_RUN) {
147287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                // look up the current character's category, which tells us
147387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                // which column in the state table to look at.
147487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                //
147587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                category = (short) fRData.fTrie.getCodePointValue(c);
147687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
147787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                // Check the dictionary bit in the character's category.
147887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                //    Counter is only used by dictionary based iterators (subclasses).
147987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                //    Chars that need to be handled by a dictionary have a flag bit set
148087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                //    in their category values.
14817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //
148287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                if ((category & 0x4000) != 0)  {
148387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    fDictionaryCharCount++;
148487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    //  And off the dictionary flag bit.
148587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    category &= ~0x4000;
14867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
148787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            }
148887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
148987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
149087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            if (TRACE) {
149187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                System.out.print("             " + fText.getIndex() + "   ");
149287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                if (0x20 <= c && c < 0x7f) {
149387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    System.out.print("  " + c + "  ");
149487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                } else {
149587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    System.out.print(" " + Integer.toHexString(c) + " ");
149687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                }
149787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                System.out.println(" " + state + "  " + category + " ");
149887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            }
149987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
150087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            // State Transition - move machine to its next state
150187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            //
150287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            state = stateTable[row + RBBIDataWrapper.NEXTSTATES + category];
150387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            row = fRData.getRowIndex(state);
150487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
150587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            if (stateTable[row + RBBIDataWrapper.ACCEPTING] == -1) {
150687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                // Match found, common case, could have lookahead so we move
150787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                // on to check it
150887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                result = fText.getIndex();
150987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            }
151087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
151187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
151287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            int completedRule = stateTable[row + RBBIDataWrapper.ACCEPTING];
151387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            if (completedRule > 0) {
151487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                // Lookahead match is completed.
151587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                int lookaheadResult = fLookAheadMatches.getPosition(completedRule);
151687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                if (lookaheadResult >= 0) {
151787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    result = lookaheadResult;
151887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    break mainLoop;
15197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
152087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            }
152187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            int rule = stateTable[row + RBBIDataWrapper.LOOKAHEAD];
152287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            if (rule != 0) {
152387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                // At the position of a '/' in a look-ahead match. Record it.
152487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                int pos = fText.getIndex();
152587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                fLookAheadMatches.setPosition(rule, pos);
152687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            }
152787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
15287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (state == STOP_STATE) {
15297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Normal loop exit is here
15307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break mainLoop;
15317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
153287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
15337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // then move iterator position backwards one character
15347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            //
15357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (mode == RBBI_RUN) {
15367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c = previous32(fText);
15377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
15387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (mode == RBBI_START) {
15397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    mode = RBBI_RUN;
15407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
15417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
154287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
154387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
15447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }   // End of the main loop.
154587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
15467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // The state machine is done.  Check whether it found a match...
15477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //
15487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // If the iterator failed to advance in the match engine, force it ahead by one.
15497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //   (This really indicates a defect in the break rules.  They should always match
15507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //    at least one character.)
15517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (result == initialPosition) {
15527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result = fText.setIndex(initialPosition);
15537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            previous32(fText);
15547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result = fText.getIndex();
15557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
155687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
15577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fText.setIndex(result);
15587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (TRACE) {
15597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            System.out.println("Result = " + result);
15607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
156187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
15627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return result;
15637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
15647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
15657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1566