12ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* GENERATED SOURCE. DO NOT MODIFY. */
2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others.
3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
42ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/*
52ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *******************************************************************************
608ae9f2909b2ec37f755dac4372553437e9d7cf6Paul Duffin * Copyright (C) 2005-2016 International Business Machines Corporation and
72ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * others. All Rights Reserved.
82ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *******************************************************************************
92ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */
102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpackage android.icu.text;
122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport static android.icu.impl.CharacterIteration.DONE32;
142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport static android.icu.impl.CharacterIteration.next32;
152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport static android.icu.impl.CharacterIteration.nextTrail32;
162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport static android.icu.impl.CharacterIteration.previous32;
172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.io.ByteArrayOutputStream;
192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.io.IOException;
202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.io.InputStream;
212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.io.OutputStream;
222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.nio.ByteBuffer;
232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.text.CharacterIterator;
24495cb271e305cfb399d463f32210a371198f0abfFredrik Roubertimport java.util.ArrayList;
25495cb271e305cfb399d463f32210a371198f0abfFredrik Roubertimport java.util.List;
262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.CharacterIteration;
282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.ICUBinary;
292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.ICUDebug;
3005fa7802d0874812c234a29745586677ee5837eaFredrik Roubertimport android.icu.impl.Trie2;
312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UCharacter;
322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UProperty;
332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UScript;
342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/**
361c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert * Rule Based Break Iterator
372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * This is a port of the C++ class RuleBasedBreakIterator from ICU4C.
381c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert *
39836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide Only a subset of ICU is exposed in Android
402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */
412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpublic class RuleBasedBreakIterator extends BreakIterator {
422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //=======================================================================
432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Constructors & Factories
442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //=======================================================================
451c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
461c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert    /**
472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * private constructor
482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private RuleBasedBreakIterator() {
502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fDictionaryCharCount  = 0;
51495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert        synchronized(gAllBreakEngines) {
52495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert            fBreakEngines = new ArrayList<LanguageBreakEngine>(gAllBreakEngines);
53495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert        }
542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Create a break iterator from a precompiled set of break rules.
581c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert     *
592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Creating a break iterator from the binary rules is much faster than
601c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert     * creating one from source rules.
611c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert     *
622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The binary rules are generated by the RuleBasedBreakIterator.compileRules() function.
632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Binary break iterator rules are not guaranteed to be compatible between
642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * different versions of ICU.
651c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert     *
662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param is an input stream supplying the compiled binary rules.
672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @throws IOException if there is an error while reading the rules from the InputStream.
682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @see    #compileRules(String, OutputStream)
692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static RuleBasedBreakIterator getInstanceFromCompiledRules(InputStream is) throws IOException {
712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        RuleBasedBreakIterator  This = new RuleBasedBreakIterator();
722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        This.fRData = RBBIDataWrapper.get(ICUBinary.getByteBufferFromInputStreamAndCloseStream(is));
731c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert        return This;
742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Create a break iterator from a precompiled set of break rules.
782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Creating a break iterator from the binary rules is much faster than
802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * creating one from source rules.
812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The binary rules are generated by the RuleBasedBreakIterator.compileRules() function.
832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Binary break iterator rules are not guaranteed to be compatible between
842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * different versions of ICU.
852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param bytes a buffer supplying the compiled binary rules.
872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @throws IOException if there is an error while reading the rules from the buffer.
882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @see    #compileRules(String, OutputStream)
892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @deprecated This API is ICU internal only.
90836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller     * @hide draft / provisional / internal are hidden on Android
912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    @Deprecated
932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static RuleBasedBreakIterator getInstanceFromCompiledRules(ByteBuffer bytes) throws IOException {
942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        RuleBasedBreakIterator  This = new RuleBasedBreakIterator();
952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        This.fRData = RBBIDataWrapper.get(bytes);
962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return This;
972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
1002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Construct a RuleBasedBreakIterator from a set of rules supplied as a string.
1012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param rules The break rules to be used.
1022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
1032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public RuleBasedBreakIterator(String rules)  {
1042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        this();
1052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        try {
1062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ByteArrayOutputStream ruleOS = new ByteArrayOutputStream();
1072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            compileRules(rules, ruleOS);
1082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fRData = RBBIDataWrapper.get(ByteBuffer.wrap(ruleOS.toByteArray()));
1092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } catch (IOException e) {
1102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ///CLOVER:OFF
1112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // An IO exception can only arrive here if there is a bug in the RBBI Rule compiler,
1122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //  causing bogus compiled rules to be produced, but with no compile error raised.
1132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            RuntimeException rte = new RuntimeException("RuleBasedBreakIterator rule compilation internal error: "
1142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    + e.getMessage());
1152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw rte;
1162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ///CLOVER:ON
1172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //=======================================================================
1212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Boilerplate
1222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //=======================================================================
1232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
1252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Clones this iterator.
1262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return A newly-constructed RuleBasedBreakIterator with the same
1272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * behavior as this one.
1282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
129f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
13005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    public Object clone()  {
13105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        RuleBasedBreakIterator result;
13205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        result = (RuleBasedBreakIterator)super.clone();
1332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (fText != null) {
1341c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            result.fText = (CharacterIterator)(fText.clone());
1352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
136495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert        synchronized (gAllBreakEngines)  {
137495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert            result.fBreakEngines = new ArrayList<LanguageBreakEngine>(gAllBreakEngines);
138495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert        }
139495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert        result.fLookAheadMatches = new LookAheadResults();
14005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        result.fBreakCache = result.new BreakCache(fBreakCache);
14105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        result.fDictionaryCache = result.new DictionaryCache(fDictionaryCache);
1422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return result;
1432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
1462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
1472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Returns true if both BreakIterators are of the same class, have the same
1482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * rules, and iterate over the same text.
1492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
150f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
1512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean equals(Object that) {
1522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (that == null) {
1532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return false;
1542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (this == that) {
1562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return true;
1572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        try {
1592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            RuleBasedBreakIterator other = (RuleBasedBreakIterator) that;
1602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (fRData != other.fRData && (fRData == null || other.fRData == null)) {
1612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return false;
1622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
1631c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            if (fRData != null && other.fRData != null &&
1642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    (!fRData.fRuleSource.equals(other.fRData.fRuleSource))) {
1652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return false;
1662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
1672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (fText == null && other.fText == null) {
1681c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                return true;
1692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
17005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if (fText == null || other.fText == null || !fText.equals(other.fText)) {
1711c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                return false;
1722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
17305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return fPosition == other.fPosition;
1742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        catch(ClassCastException e) {
1762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return false;
1772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     }
1792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
1812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Returns the description (rules) used to create this iterator.
1822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * (In ICU4C, the same function is RuleBasedBreakIterator::getRules())
1832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
184f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
1852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public String toString() {
1862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        String retStr = "";
1872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (fRData != null) {
1882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            retStr =  fRData.fRuleSource;
1892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return retStr;
1912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
1942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Compute a hashcode for this BreakIterator
1952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return A hash code
1962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
197f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
1982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int hashCode()
1992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
2001c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert        return fRData.fRuleSource.hashCode();
2012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
2022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int  START_STATE = 1;     // The state number of the starting state
2052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int  STOP_STATE  = 0;     // The state-transition value indicating "stop"
2061c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
2072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // RBBIRunMode - the state machine runs an extra iteration at the beginning and end
2082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //               of user text.  A variable with this enum type keeps track of where we
2092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //               are.  The state machine only fetches user text input while in RUN mode.
2102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int  RBBI_START  = 0;
2112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int  RBBI_RUN    = 1;
2122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int  RBBI_END    = 2;
2132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /*
2152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The character iterator through which this BreakIterator accesses the text.
2162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
2172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private CharacterIterator   fText = new java.text.StringCharacterIterator("");
2181c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
2192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
2202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The rule data for this BreakIterator instance. Package private.
2212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
2222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    RBBIDataWrapper             fRData;
2231c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
22405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    /**
22505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *  The iteration state - current position, rule status for the current position,
22605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *                        and whether the iterator ran off the end, yielding UBRK_DONE.
22705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *                        Current position is pinned to be 0 < position <= text.length.
22805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *                        Current position is always set to a boundary.
22905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *
23005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *  The current  position of the iterator. Pinned, 0 < fPosition <= text.length.
23105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *  Never has the value UBRK_DONE (-1).
23205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     */
23305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    private int                fPosition;
23405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
23505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    /**
2361c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert     * Index of the Rule {tag} values for the most recent match.
2372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
23805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    private int                fRuleStatusIndex;
2392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
24005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    /**
24105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * True when iteration has run off the end, and iterator functions should return UBRK_DONE.
2422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
24305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    private boolean            fDone;
24405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
24505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    /**
24605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *   Cache of previously determined boundary positions.
24705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     */
24805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    private BreakCache         fBreakCache = new BreakCache();
24905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
2502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
2522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Counter for the number of characters encountered with the "dictionary"
2532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   flag set.  Normal RBBI iterators don't use it, although the code
2542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   for updating it is live.  Dictionary Based break iterators (a subclass
2552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   of us) access this field directly.
256836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller     * @hide draft / provisional / internal are hidden on Android
2572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
2582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int fDictionaryCharCount;
2592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
26005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    private DictionaryCache     fDictionaryCache = new DictionaryCache();
26105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
2622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /*
2632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * ICU debug argument name for RBBI
2642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
2652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final String RBBI_DEBUG_ARG = "rbbi";
2662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
2682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Debugging flag.  Trace operation of state machine when true.
2692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
2702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final boolean TRACE = ICUDebug.enabled(RBBI_DEBUG_ARG)
2712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            && ICUDebug.value(RBBI_DEBUG_ARG).indexOf("trace") >= 0;
2722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
27405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * What kind of break iterator this is.
27505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * Defaulting BreakType to word gives reasonable dictionary behavior for
27605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * Break Iterators that are built from rules.
2772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
27805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    private int fBreakType = KIND_WORD;
2791c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
2802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
2812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The "default" break engine - just skips over ranges of dictionary words,
2822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * producing no breaks. Should only be used if characters need to be handled
2832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * by a dictionary but we have no dictionary implementation for them.
284495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert     *
285495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert     * Only one instance; shared by all break iterators.
2862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
287495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert    private static final UnhandledBreakEngine gUnhandledBreakEngine;
288495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert
289495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert    /**
290495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert     * List of all known break engines, common for all break iterators.
291495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert     * Lazily updated as break engines are needed, because instantiation of
292495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert     * break engines is expensive.
293495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert     *
294495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert     * Because gAllBreakEngines can be referenced concurrently from different
295495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert     * BreakIterator instances, all access is synchronized.
296495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert     */
297495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert    private static final List<LanguageBreakEngine> gAllBreakEngines;
298495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert
299495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert    static {
300495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert        gUnhandledBreakEngine = new UnhandledBreakEngine();
301495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert        gAllBreakEngines = new ArrayList<LanguageBreakEngine>();
302495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert        gAllBreakEngines.add(gUnhandledBreakEngine);
303495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert    }
3041c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
3052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
306495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert     * List of all known break engines. Similar to gAllBreakEngines, but local to a
307495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert     * break iterator, allowing it to be used without synchronization.
308495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert     */
309495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert    private List<LanguageBreakEngine> fBreakEngines;
310495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert
3112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
3122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Dump the contents of the state table and character classes for this break iterator.
3132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * For debugging only.
3142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @deprecated This API is ICU internal only.
315836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller     * @hide draft / provisional / internal are hidden on Android
3162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
3172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    @Deprecated
318f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    public void dump(java.io.PrintStream out) {
319f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        if (out == null) {
320f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert            out = System.out;
321f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        }
322f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        this.fRData.dump(out);
3232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
3242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
3262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Compile a set of source break rules into the binary state tables used
3272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * by the break iterator engine.  Creating a break iterator from precompiled
3282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * rules is much faster than creating one from source rules.
3291c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert     *
3302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Binary break rules are not guaranteed to be compatible between different
3312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * versions of ICU.
3321c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert     *
3331c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert     *
3342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param rules  The source form of the break rules
3352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param ruleBinary  An output stream to receive the compiled rules.
3362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @throws IOException If there is an error writing the output.
3372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @see #getInstanceFromCompiledRules(InputStream)
3382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
3392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static void compileRules(String rules, OutputStream ruleBinary) throws IOException {
3402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        RBBIRuleBuilder.compileRules(rules, ruleBinary);
3412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
3421c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
3432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //=======================================================================
3442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // BreakIterator overrides
3452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //=======================================================================
3462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
3482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Sets the current iteration position to the beginning of the text.
3492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * (i.e., the CharacterIterator's starting offset).
3502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return The offset of the beginning of the text.
3512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
352f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
3532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int first() {
3542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (fText == null) {
3552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return BreakIterator.DONE;
3562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fText.first();
35805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int start =  fText.getIndex();
35905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (!fBreakCache.seek(start)) {
36005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fBreakCache.populateNear(start);
36105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
36205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fBreakCache.current();
36305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        assert(fPosition == start);
36405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return fPosition;
3652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
3661c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
3672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
3682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Sets the current iteration position to the end of the text.
3692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * (i.e., the CharacterIterator's ending offset).
3702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return The text's past-the-end offset.
3712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
372f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
3732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int last() {
3742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (fText == null) {
3752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return BreakIterator.DONE;
3762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
37705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int endPos = fText.getEndIndex();
37805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        boolean endShouldBeBoundary = isBoundary(endPos);      // Has side effect of setting iterator position.
37905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        assert(endShouldBeBoundary);
38005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (fPosition != endPos) {
38105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            assert(fPosition == endPos);
38205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
38305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return endPos;
3842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
3851c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
3862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
3872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Advances the iterator either forward or backward the specified number of steps.
3882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Negative values move backward, and positive values move forward.  This is
3892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * equivalent to repeatedly calling next() or previous().
3902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param n The number of steps to move.  The sign indicates the direction
3912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * (negative is backwards, and positive is forwards).
3922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return The character offset of the boundary position n boundaries away from
3932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the current one.
3942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
395f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
3962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int next(int n) {
39705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int result = 0;
39805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (n > 0) {
39905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            for (; n > 0 && result != DONE; --n) {
40005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                result = next();
40105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
40205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        } else if (n < 0) {
40305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            for (; n < 0 && result != DONE; ++n) {
40405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                result = previous();
40505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
40605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        } else {
40705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            result = current();
4082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return result;
4102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4111c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
4122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
4132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Advances the iterator to the next boundary position.
4142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return The position of the first boundary after this one.
4152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
416f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
4172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int next() {
41805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fBreakCache.next();
41905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return fDone ? DONE : fPosition;
4202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
42305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * Moves the iterator backwards, to the boundary preceding the current one.
42405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * @return The position of the boundary position immediately preceding the starting position.
4252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
426f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
4272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int previous() {
42805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fBreakCache.previous();
42905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return fDone ? DONE : fPosition;
4302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
4332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Sets the iterator to refer to the first boundary position following
4342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the specified position.
43505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * @param startPos The position from which to begin searching for a break position.
4362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return The position of the first break after the current position.
4372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
438f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
43905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    public int following(int startPos) {
44005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // if the supplied position is before the beginning, return the
4412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // text's starting offset
44205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (startPos < fText.getBeginIndex()) {
4432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return first();
4442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
44605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // Move requested offset to a code point start. It might be on a trail surrogate.
44705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // Or it may be beyond the end of the text.
44805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        startPos = CISetIndex32(fText, startPos);
44905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fBreakCache.following(startPos);
45005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return fDone ? DONE : fPosition;
45105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    }
4522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
4552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Sets the iterator to refer to the last boundary position before the
4562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * specified position.
4572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param offset The position to begin searching for a break from.
4582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return The position of the last boundary before the starting position.
4592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
460f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
4612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int preceding(int offset) {
4622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (fText == null || offset > fText.getEndIndex()) {
4632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return last();
46405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        } else if (offset < fText.getBeginIndex()) {
4652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return first();
4662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
46805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // Move requested offset to a code point start. It might be on a trail surrogate.
46905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // int adjustedOffset = CISetIndex32(fText, offset);    // TODO: restore to match ICU4C behavior.
47005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int adjustedOffset = offset;
47105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fBreakCache.preceding(adjustedOffset);
47205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return fDone ? DONE : fPosition;
4732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
47605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
4772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
47808ae9f2909b2ec37f755dac4372553437e9d7cf6Paul Duffin     * Throw IllegalArgumentException unless begin &lt;= offset &lt; end.
4792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
4802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    protected static final void checkOffset(int offset, CharacterIterator text) {
4812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (offset < text.getBeginIndex() || offset > text.getEndIndex()) {
4822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalArgumentException("offset out of bounds");
4832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
4882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Returns true if the specified position is a boundary position.  As a side
4892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * effect, leaves the iterator pointing to the first boundary position at
4902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * or after "offset".
4912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param offset the offset to check.
4922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return True if "offset" is a boundary position.
4932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
494f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
4952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean isBoundary(int offset) {
49605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // TODO: behavior difference with ICU4C, which considers out-of-range offsets
49705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        //       to not be boundaries, and to not be errors.
4982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        checkOffset(offset, fText);
4992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
50005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // Adjust offset to be on a code point boundary and not beyond the end of the text.
50105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // Note that isBoundary() is always be false for offsets that are not on code point boundaries.
50205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // But we still need the side effect of leaving iteration at the following boundary.
50305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int adjustedOffset = CISetIndex32(fText, offset);
5042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
50505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        boolean result = false;
50605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (fBreakCache.seek(adjustedOffset) || fBreakCache.populateNear(adjustedOffset)) {
50705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            result = (fBreakCache.current() == offset);
5082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
51005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (!result) {
51105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // Not on a boundary. isBoundary() must leave iterator on the following boundary.
51205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // fBreakCache.seek(), above, left us on the preceding boundary, so advance one.
51305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            next();
51405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
5152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return result;
51605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
5172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
52005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * Returns the current iteration position.  Note that UBRK_DONE is never
52105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * returned from this function; if iteration has run to the end of a
52205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * string, current() will return the length of the string while
52305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * next() will return BreakIterator.DONE).
5242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return The current iteration position.
5252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
526f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
5272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int current() {
52805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return (fText != null) ? fPosition : BreakIterator.DONE;
5292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
5332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Return the status tag from the break rule that determined the most recently
5342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * returned break position.  The values appear in the rule source
5352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * within brackets, {123}, for example.  For rules that do not specify a
5362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * status, a default value of 0 is returned.  If more than one rule applies,
5372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the numerically largest of the possible status values is returned.
5382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <p>
53905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * Of the standard types of ICU break iterators, only the word and line break
5402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * iterator provides status values.  The values are defined in
5412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * class RuleBasedBreakIterator, and allow distinguishing between words
5422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * that contain alphabetic letters, "words" that appear to be numbers,
5432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * punctuation and spaces, words containing ideographic characters, and
5442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * more.  Call <code>getRuleStatus</code> after obtaining a boundary
54508ae9f2909b2ec37f755dac4372553437e9d7cf6Paul Duffin     * position from <code>next()</code>, <code>previous()</code>, or
5462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * any other break iterator functions that returns a boundary position.
5472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <p>
5482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return the status from the break rule that determined the most recently
5492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * returned break position.
5502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
5512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
552f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
5532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int  getRuleStatus() {
5542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //   Status records have this form:
5552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //           Count N         <--  fLastRuleStatusIndex points here.
5562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //           Status val 0
5572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //           Status val 1
5582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //              ...
5592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //           Status val N-1  <--  the value we need to return
5602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //   The status values are sorted in ascending order.
5612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //   This function returns the last (largest) of the array of status values.
56205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int  idx = fRuleStatusIndex + fRData.fStatusTable[fRuleStatusIndex];
5632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int  tagVal = fRData.fStatusTable[idx];
5642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return tagVal;
5652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
5681c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert     * Get the status (tag) values from the break rule(s) that determined the most
5692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * recently returned break position.  The values appear in the rule source
5702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * within brackets, {123}, for example.  The default status value for rules
5712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * that do not explicitly provide one is zero.
5722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <p>
5732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The status values used by the standard ICU break rules are defined
5742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * as public constants in class RuleBasedBreakIterator.
5752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <p>
5762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If the size  of the output array is insufficient to hold the data,
5772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *  the output will be truncated to the available length.  No exception
5782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *  will be thrown.
5792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
5801c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert     * @param fillInArray an array to be filled in with the status values.
5811c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert     * @return          The number of rule status values from rules that determined
5822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *                  the most recent boundary returned by the break iterator.
5832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *                  In the event that the array is too small, the return value
5842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *                  is the total number of status values that were available,
5852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *                  not the reduced number that were actually returned.
5862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
587f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
5882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int getRuleStatusVec(int[] fillInArray) {
58905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int numStatusVals = fRData.fStatusTable[fRuleStatusIndex];
5901c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert        if (fillInArray != null) {
5912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int numToCopy = Math.min(numStatusVals, fillInArray.length);
5922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for (int i=0; i<numToCopy; i++) {
59305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                fillInArray[i] = fRData.fStatusTable[fRuleStatusIndex + i + 1];
5942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return numStatusVals;
5972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
6002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Return a CharacterIterator over the text being analyzed.  This version
6012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * of this method returns the actual CharacterIterator we're using internally.
6022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Changing the state of this iterator can have undefined consequences.  If
6032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * you need to change it, clone it first.
6042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return An iterator over the text being analyzed.
6052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
606f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
6072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public CharacterIterator getText() {
6082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return fText;
6092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
6102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
6122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Set the iterator to analyze a new piece of text.  This function resets
6132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the current iteration position to the beginning of the text.
6142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param newText An iterator over the text to analyze.
6152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
616f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
6172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public void setText(CharacterIterator newText) {
61805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (newText != null) {
61905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fBreakCache.reset(newText.getBeginIndex(), 0);
62005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        } else {
62105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fBreakCache.reset();
62205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
62305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fDictionaryCache.reset();
6242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fText = newText;
6252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        this.first();
6262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
6272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
6292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * package private
6302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
6312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    void setBreakType(int type) {
6322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fBreakType = type;
6332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
6342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
6362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * package private
6372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
6382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    int getBreakType() {
6392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return fBreakType;
6402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
6412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
6432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Control debug, trace and dump options.
644836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller     * @hide draft / provisional / internal are hidden on Android
6452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
6462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static final String fDebugEnv = ICUDebug.enabled(RBBI_DEBUG_ARG) ?
6472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                        ICUDebug.value(RBBI_DEBUG_ARG) : null;
6481c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
6491c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
6502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private LanguageBreakEngine getLanguageBreakEngine(int c) {
6512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // We have a dictionary character.
6532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Does an already instantiated break engine handle it?
654495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert        for (LanguageBreakEngine candidate : fBreakEngines) {
6552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (candidate.handles(c, fBreakType)) {
6562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return candidate;
6572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
6582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
660495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert        synchronized (gAllBreakEngines) {
661495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert            // This break iterator's list of break engines didn't handle the character.
662495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert            // Check the global list, another break iterator may have instantiated the
663495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert            // desired engine.
664495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert            for (LanguageBreakEngine candidate : gAllBreakEngines) {
665495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert                if (candidate.handles(c, fBreakType)) {
666495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert                    fBreakEngines.add(candidate);
667495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert                    return candidate;
668495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert                }
669495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert            }
670495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert
671495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert            // The global list doesn't have an existing engine, build one.
672495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert            int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
673495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert            if (script == UScript.KATAKANA || script == UScript.HIRAGANA) {
674495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert                // Katakana, Hiragana and Han are handled by the same dictionary engine.
675495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert                // Fold them together for mapping from script -> engine.
676495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert                script = UScript.HAN;
677495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert            }
6781c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
679495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert            LanguageBreakEngine eng;
6802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            try {
6812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                switch (script) {
6822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                case UScript.THAI:
6832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    eng = new ThaiBreakEngine();
6842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
6852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                case UScript.LAO:
6862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    eng = new LaoBreakEngine();
6872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
6882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                case UScript.MYANMAR:
6892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    eng = new BurmeseBreakEngine();
6902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
6912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                case UScript.KHMER:
6922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    eng = new KhmerBreakEngine();
6932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
6942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                case UScript.HAN:
6952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (getBreakType() == KIND_WORD) {
6962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        eng = new CjkBreakEngine(false);
6972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
6982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    else {
699495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert                        gUnhandledBreakEngine.handleChar(c, getBreakType());
700495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert                        eng = gUnhandledBreakEngine;
7012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
7022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
7032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                case UScript.HANGUL:
7042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (getBreakType() == KIND_WORD) {
7052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        eng = new CjkBreakEngine(true);
7062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
707495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert                        gUnhandledBreakEngine.handleChar(c, getBreakType());
708495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert                        eng = gUnhandledBreakEngine;
7092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
7102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
7112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                default:
712495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert                    gUnhandledBreakEngine.handleChar(c, getBreakType());
713495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert                    eng = gUnhandledBreakEngine;
7142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
7152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
7162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } catch (IOException e) {
7172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                eng = null;
7182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
720495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert            if (eng != null && eng != gUnhandledBreakEngine) {
721495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert                gAllBreakEngines.add(eng);
722495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert                fBreakEngines.add(eng);
7232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
724495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert            return eng;
725495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert        }   // end synchronized(gAllBreakEngines)
7262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
7272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7281c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert    private static final int kMaxLookaheads = 8;
7291c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert    private static class LookAheadResults {
7301c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert        int      fUsedSlotLimit;
7311c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert        int[]    fPositions;
7321c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert        int[]    fKeys;
7331c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
7341c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert        LookAheadResults() {
7351c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            fUsedSlotLimit= 0;
7361c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            fPositions = new int[kMaxLookaheads];
7371c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            fKeys = new int[kMaxLookaheads];
7381c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert        }
7391c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
7401c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert        int getPosition(int key) {
7411c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            for (int i=0; i<fUsedSlotLimit; ++i) {
7421c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                if (fKeys[i] == key) {
7431c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                    return fPositions[i];
7441c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                }
7451c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            }
7461c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            assert(false);
7471c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            return -1;
7481c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert        }
7491c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
7501c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert        void setPosition(int key, int position) {
7511c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            int i;
7521c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            for (i=0; i<fUsedSlotLimit; ++i) {
7531c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                if (fKeys[i] == key) {
7541c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                    fPositions[i] = position;
7551c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                    return;
7561c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                }
7571c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            }
7581c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            if (i >= kMaxLookaheads) {
7591c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                assert(false);
7601c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                i = kMaxLookaheads - 1;
7611c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            }
7621c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            fKeys[i] = key;
7631c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            fPositions[i] = position;
7641c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            assert(fUsedSlotLimit == i);
7651c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            fUsedSlotLimit = i + 1;
7661c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert        }
7671c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
7681c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert        void reset() {
7691c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            fUsedSlotLimit = 0;
7701c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert        }
7711c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert    };
7721c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert    private LookAheadResults fLookAheadMatches = new LookAheadResults();
7731c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
7742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
7762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The State Machine Engine for moving forward is here.
7772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * This function is the heart of the RBBI run time engine.
7781c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert     *
77905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * Input
78005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *    fPosition, the position in the text to begin from.
78105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * Output
78205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *    fPosition:           the boundary following the starting position.
78305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *    fDictionaryCharCount the number of dictionary characters encountered.
78405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *                         If > 0, the segment will be further subdivided
78505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *    fRuleStatusIndex     Info from the state table indicating which rules caused the boundary.
78605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *
7872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return the new iterator position
7881c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert     *
7892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * A note on supplementary characters and the position of underlying
7902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Java CharacterIterator:   Normally, a character iterator is positioned at
7912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the char most recently returned by next().  Within this function, when
7922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * a supplementary char is being processed, the char iterator is left
7932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * sitting on the trail surrogate, in the middle of the code point.
7942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * This is different from everywhere else, where an iterator always
7952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * points at the lead surrogate of a supplementary.
7962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
79705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    private int handleNext() {
7982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (TRACE) {
7992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            System.out.println("Handle Next   pos      char  state category");
8002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
80205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // handleNext always sets the break tag value.
80305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // Set the default for it.
80405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fRuleStatusIndex  = 0;
80505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fDictionaryCharCount = 0;
8062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // caches for quicker access
8082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        CharacterIterator text = fText;
80905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        Trie2 trie = fRData.fTrie;
81005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
81105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        short[] stateTable  = fRData.fFTable;
81205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int initialPosition = fPosition;
81305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        text.setIndex(initialPosition);
81405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int result          = initialPosition;
8152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Set up the starting char
81705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int c = text.current();
8182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE) {
8192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            c = nextTrail32(text, c);
8202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (c == DONE32) {
82105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                fDone = true;
8222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return BreakIterator.DONE;
8232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Set the initial state for the state machine
8272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int state           = START_STATE;
8281c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert        int row             = fRData.getRowIndex(state);
8292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        short category      = 3;
8302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int flagsState      = fRData.getStateTableFlags(stateTable);
8312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int mode            = RBBI_RUN;
8322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if ((flagsState & RBBIDataWrapper.RBBI_BOF_REQUIRED) != 0) {
8332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            category = 2;
8342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            mode     = RBBI_START;
8352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (TRACE) {
8361c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                System.out.print("            " +  RBBIDataWrapper.intToString(text.getIndex(), 5));
8372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                System.out.print(RBBIDataWrapper.intToHexString(c, 10));
8382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                System.out.println(RBBIDataWrapper.intToString(state,7) + RBBIDataWrapper.intToString(category,6));
8392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8411c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert        fLookAheadMatches.reset();
8422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // loop until we reach the end of the text or transition to state 0
8442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while (state != STOP_STATE) {
8452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (c == DONE32) {
8462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Reached end of input string.
8472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (mode == RBBI_END) {
8482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // We have already run the loop one last time with the
8492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // character set to the pseudo {eof} value. Now it is time
8502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // to unconditionally bail out.
8512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
8522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
8532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Run the loop one last time with the fake end-of-input character category
8542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                mode = RBBI_END;
8552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                category = 1;
8562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else if (mode == RBBI_RUN) {
8582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Get the char category.  An incoming category of 1 or 2 mens that
8592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //      we are preset for doing the beginning or end of input, and
8602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //      that we shouldn't get a category from an actual text input character.
8612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //
8622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // look up the current character's character category, which tells us
8642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // which column in the state table to look at.
8652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //
86605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                category = (short) trie.get(c);
8671c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
8682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Check the dictionary bit in the character's category.
8692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //    Counter is only used by dictionary based iterators (subclasses).
8702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //    Chars that need to be handled by a dictionary have a flag bit set
8712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //    in their category values.
8722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //
8732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if ((category & 0x4000) != 0)  {
8742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    fDictionaryCharCount++;
8752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    //  And off the dictionary flag bit.
8762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    category &= ~0x4000;
8772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
8782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (TRACE) {
8801c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                    System.out.print("            " +  RBBIDataWrapper.intToString(text.getIndex(), 5));
8812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    System.out.print(RBBIDataWrapper.intToHexString(c, 10));
8822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    System.out.println(RBBIDataWrapper.intToString(state,7) + RBBIDataWrapper.intToString(category,6));
8832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
8842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8851c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                // Advance to the next character.
8862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // If this is a beginning-of-input loop iteration, don't advance.
8872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //    The next iteration will be processing the first real input character.
888f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                c = text.next();
8892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE) {
8902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    c = nextTrail32(text, c);
8912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
8922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
8942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                mode = RBBI_RUN;
8952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // look up a state transition in the state table
8982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            state = stateTable[row + RBBIDataWrapper.NEXTSTATES + category];
8991c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            row   = fRData.getRowIndex(state);
9002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (stateTable[row + RBBIDataWrapper.ACCEPTING] == -1) {
9022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Match found, common case
9032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                result = text.getIndex();
9042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && c <= UTF16.CODEPOINT_MAX_VALUE) {
9052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // The iterator has been left in the middle of a surrogate pair.
9062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // We want the start of it.
9072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    result--;
9082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
9092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //  Remember the break status (tag) values.
91105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                fRuleStatusIndex = stateTable[row + RBBIDataWrapper.TAGIDX];
9122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
9132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9141c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            int completedRule = stateTable[row + RBBIDataWrapper.ACCEPTING];
9151c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            if (completedRule > 0) {
9161c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                // Lookahead match is completed
9171c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                int lookaheadResult = fLookAheadMatches.getPosition(completedRule);
9181c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                if (lookaheadResult >= 0) {
91905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    fRuleStatusIndex = stateTable[row + RBBIDataWrapper.TAGIDX];
92005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    fPosition = lookaheadResult;
9211c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                    return lookaheadResult;
9222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
9231c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            }
9242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9251c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            int rule =  stateTable[row + RBBIDataWrapper.LOOKAHEAD];
9261c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            if (rule != 0) {
9271c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                // At the position of a '/' in a look-ahead match. Record it.
9281c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                int  pos = text.getIndex();
9292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && c <= UTF16.CODEPOINT_MAX_VALUE) {
9302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // The iterator has been left in the middle of a surrogate pair.
9312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // We want the beginning  of it.
9321c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                    pos--;
9332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
9341c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                fLookAheadMatches.setPosition(rule, pos);
9352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
9362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9371c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
9382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }        // End of state machine main loop
9392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // The state machine is done.  Check whether it found a match...
9412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // If the iterator failed to advance in the match engine force it ahead by one.
9432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // This indicates a defect in the break rules, which should always match
9442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // at least one character.
9451c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
9462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (result == initialPosition) {
9472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (TRACE) {
9482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                System.out.println("Iterator did not move. Advancing by 1.");
9492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
9502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            text.setIndex(initialPosition);
9512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            next32(text);
9522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            result = text.getIndex();
95305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fRuleStatusIndex = 0;
9542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
95505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
95605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // Leave the iterator at our result position.
95705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        //   (we may have advanced beyond the last accepting position chasing after
95805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        //    longer matches that never completed.)
95905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fPosition = result;
96005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
9612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (TRACE) {
9622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            System.out.println("result = " + result);
9632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
9642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return result;
9652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
9662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
96705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    /**
96805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * Iterate backwards from an arbitrary position in the input text using the Safe Reverse rules.
96905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * This locates a "Safe Position" from which the forward break rules
97005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * will operate correctly. A Safe Position is not necessarily a boundary itself.
97105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *
97205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * The logic of this function is very similar to handleNext(), above.
97305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *
97405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * @param fromPosition the position in the input text to begin the iteration.
97505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * @hide draft / provisional / internal are hidden on Android
97605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     */
97705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    private int handlePrevious(int fromPosition) {
97805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (fText == null) {
9792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return 0;
9802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
9811c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
9822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int            state;
9832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int            category           = 0;
9842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int            mode;
9851c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert        int            row;
9862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int            c;
9872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int            result             = 0;
98805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int            initialPosition    = fromPosition;
9891c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert        fLookAheadMatches.reset();
99005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        short[] stateTable = fRData.fSRTable;
99105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        CISetIndex32(fText, fromPosition);
99205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (fromPosition == fText.getBeginIndex()) {
99305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return BreakIterator.DONE;
99405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
9951c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
9962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // set up the starting char
9972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        result          = initialPosition;
9982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        c               = previous32(fText);
9991c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
10002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Set up the initial state for the state machine
10012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        state = START_STATE;
10022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        row = fRData.getRowIndex(state);
10032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        category = 3;   // TODO:  obsolete?  from the old start/run mode scheme?
10042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        mode     = RBBI_RUN;
10052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if ((fRData.getStateTableFlags(stateTable) & RBBIDataWrapper.RBBI_BOF_REQUIRED) != 0) {
10062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            category = 2;
10072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            mode     = RBBI_START;
10082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10091c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
10102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (TRACE) {
10112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            System.out.println("Handle Prev   pos   char  state category ");
10122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10131c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
10142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // loop until we reach the beginning of the text or transition to state 0
10152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //
10162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        mainLoop: for (;;) {
10171c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            if (c == DONE32) {
10181c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                // Reached end of input string.
101905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                if (mode == RBBI_END) {
102005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    // We have already done the {eof} iteration.  Now is the time
10211c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                    // to unconditionally bail out.
10221c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                    break mainLoop;
10232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
10241c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                mode = RBBI_END;
10251c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                category = 1;
10261c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            }
10271c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
10281c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            if (mode == RBBI_RUN) {
10291c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                // look up the current character's category, which tells us
10301c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                // which column in the state table to look at.
10311c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                //
103205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                //  And off the dictionary flag bit. For reverse iteration it is not used.
103305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                category = (short) fRData.fTrie.get(c);
103405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                category &= ~0x4000;
10351c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            }
10361c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
10371c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            if (TRACE) {
10381c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                System.out.print("             " + fText.getIndex() + "   ");
10391c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                if (0x20 <= c && c < 0x7f) {
10401c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                    System.out.print("  " + c + "  ");
10411c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                } else {
10421c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                    System.out.print(" " + Integer.toHexString(c) + " ");
10431c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                }
10441c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                System.out.println(" " + state + "  " + category + " ");
10451c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            }
10461c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
10471c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            // State Transition - move machine to its next state
10481c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            //
10491c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            state = stateTable[row + RBBIDataWrapper.NEXTSTATES + category];
10501c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            row = fRData.getRowIndex(state);
10511c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
10521c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            if (stateTable[row + RBBIDataWrapper.ACCEPTING] == -1) {
10531c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                // Match found, common case, could have lookahead so we move
10541c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                // on to check it
10551c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                result = fText.getIndex();
10561c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            }
10571c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
10581c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
10591c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            int completedRule = stateTable[row + RBBIDataWrapper.ACCEPTING];
10601c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            if (completedRule > 0) {
10611c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                // Lookahead match is completed.
10621c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                int lookaheadResult = fLookAheadMatches.getPosition(completedRule);
10631c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                if (lookaheadResult >= 0) {
10641c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                    result = lookaheadResult;
10651c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                    break mainLoop;
10662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
10671c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            }
10681c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            int rule = stateTable[row + RBBIDataWrapper.LOOKAHEAD];
10691c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            if (rule != 0) {
10701c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                // At the position of a '/' in a look-ahead match. Record it.
10711c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                int pos = fText.getIndex();
10721c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                fLookAheadMatches.setPosition(rule, pos);
10731c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            }
10741c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
10752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (state == STOP_STATE) {
10762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Normal loop exit is here
10772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break mainLoop;
10782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
10791c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
10802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // then move iterator position backwards one character
10812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //
10822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (mode == RBBI_RUN) {
10832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c = previous32(fText);
10842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
10852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (mode == RBBI_START) {
10862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    mode = RBBI_RUN;
10872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
10882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
10891c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
10901c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
10912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }   // End of the main loop.
10921c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
10932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // The state machine is done.  Check whether it found a match...
10942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //
109505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // If the iterator failed to move in the match engine, force it back by one code point.
10962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //   (This really indicates a defect in the break rules.  They should always match
10972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //    at least one character.)
10982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (result == initialPosition) {
109905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            CISetIndex32(fText, initialPosition);
11002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            previous32(fText);
11012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            result = fText.getIndex();
11022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
11031c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
11042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (TRACE) {
11052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            System.out.println("Result = " + result);
11062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
11071c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
11082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return result;
11092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
111005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
111105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    /**
111205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * Set the index of a CharacterIterator.
111305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * Pin the index to the valid range range of BeginIndex <= index <= EndIndex.
111405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * If the index points to a trail surrogate of a supplementary character, adjust it
111505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * to the start (lead surrogate) index.
111605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *
111705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * @param ci A CharacterIterator to set
111805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * @param index the index to set
111905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * @return the resulting index, possibly pinned or adjusted.
112005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     */
112105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    private static int CISetIndex32(CharacterIterator ci, int index) {
112205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (index <= ci.getBeginIndex()) {
112305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            ci.first();
112405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        } else if (index >= ci.getEndIndex()) {
112505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            ci.setIndex(ci.getEndIndex());
112605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        } else if (Character.isLowSurrogate(ci.setIndex(index))) {
112705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if (!Character.isHighSurrogate(ci.previous())) {
112805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                ci.next();
112905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
113005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
113105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return ci.getIndex();
113205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    }
113305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
113405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    /* DictionaryCache  stores the boundaries obtained from a run of dictionary characters.
113505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *                 Dictionary boundaries are moved first to this cache, then from here
113605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *                 to the main BreakCache, where they may inter-leave with non-dictionary
113705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *                 boundaries. The public BreakIterator API always fetches directly
113805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *                 from the main BreakCache, not from here.
113905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *
114005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *                 In common situations, the number of boundaries in a single dictionary run
114105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *                 should be quite small, it will be terminated by punctuation, spaces,
114205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *                 or any other non-dictionary characters. The main BreakCache may end
114305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *                 up with boundaries from multiple dictionary based runs.
114405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *
114505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *                 The boundaries are stored in a simple ArrayList (vector), with the
114605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *                 assumption that they will be accessed sequentially.
114705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     */
114805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    class DictionaryCache  {
114905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
115005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert         void reset() {
115105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             fPositionInCache = -1;
115205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             fStart = 0;
115305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             fLimit = 0;
115405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             fFirstRuleStatusIndex = 0;
115505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             fOtherRuleStatusIndex = 0;
115605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             fBreaks.removeAllElements();
115705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert         };
115805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
115905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert         boolean following(int fromPos) {
116005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             if (fromPos >= fLimit || fromPos < fStart) {
116105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                 fPositionInCache = -1;
116205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                 return false;
116305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             }
116405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
116505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             // Sequential iteration, move from previous boundary to the following
116605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
116705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             int r = 0;
116805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             if (fPositionInCache >= 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAt(fPositionInCache) == fromPos) {
116905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                 ++fPositionInCache;
117005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                 if (fPositionInCache >= fBreaks.size()) {
117105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                     fPositionInCache = -1;
117205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                     return false;
117305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                 }
117405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                 r = fBreaks.elementAt(fPositionInCache);
117505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                 assert(r > fromPos);
117605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                 fBoundary = r;
117705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                 fStatusIndex = fOtherRuleStatusIndex;
117805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                 return true;
117905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             }
118005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
118105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             // Random indexing. Linear search for the boundary following the given position.
118205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
118305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             for (fPositionInCache = 0; fPositionInCache < fBreaks.size(); ++fPositionInCache) {
118405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                 r= fBreaks.elementAt(fPositionInCache);
118505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                 if (r > fromPos) {
118605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                     fBoundary = r;
118705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                     fStatusIndex = fOtherRuleStatusIndex;
118805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                     return true;
118905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                 }
119005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             }
119105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
119205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             // Internal error. fStart <= fromPos < fLimit, but no cached boundary.
119305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             assert(false);
119405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             fPositionInCache = -1;
119505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             return false;
119605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert         };
119705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
119805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert         boolean preceding(int fromPos) {
119905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             if (fromPos <= fStart || fromPos > fLimit) {
120005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                 fPositionInCache = -1;
120105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                 return false;
120205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             }
120305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
120405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             if (fromPos == fLimit) {
120505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                 fPositionInCache = fBreaks.size() - 1;
120605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                 if (fPositionInCache >= 0) {
120705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                     assert(fBreaks.elementAt(fPositionInCache) == fromPos);
120805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                 }
120905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             }
121005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
121105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             int r;
121205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             if (fPositionInCache > 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAt(fPositionInCache) == fromPos) {
121305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                 --fPositionInCache;
121405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                 r = fBreaks.elementAt(fPositionInCache);
121505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                 assert(r < fromPos);
121605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                 fBoundary = r;
121705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                 fStatusIndex = ( r== fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
121805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                 return true;
121905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             }
122005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
122105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             if (fPositionInCache == 0) {
122205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                 fPositionInCache = -1;
122305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                 return false;
122405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             }
122505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
122605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             for (fPositionInCache = fBreaks.size()-1; fPositionInCache >= 0; --fPositionInCache) {
122705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                 r = fBreaks.elementAt(fPositionInCache);
122805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                 if (r < fromPos) {
122905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                     fBoundary = r;
123005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                     fStatusIndex = ( r == fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
123105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                     return true;
123205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                 }
123305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             }
123405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             assert(false);
123505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             fPositionInCache = -1;
123605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert             return false;
123705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert         };
123805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
123905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        /**
124005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert         * Populate the cache with the dictionary based boundaries within a region of text.
124105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert         * @param startPos  The start position of a range of text
124205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert         * @param endPos    The end position of a range of text
124305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert         * @param firstRuleStatus The rule status index that applies to the break at startPos
124405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert         * @param otherRuleStatus The rule status index that applies to boundaries other than startPos
124505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert         * @hide draft / provisional / internal are hidden on Android
124605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert         */
124705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        void populateDictionary(int startPos, int endPos,
124805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                                int firstRuleStatus, int otherRuleStatus) {
124905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if ((endPos - startPos) <= 1) {
125005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                return;
125105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
125205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
125305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            reset();
125405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fFirstRuleStatusIndex = firstRuleStatus;
125505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fOtherRuleStatusIndex = otherRuleStatus;
125605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
125705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            int rangeStart = startPos;
125805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            int rangeEnd = endPos;
125905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
126005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            int         category;
126105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            int         current;
126205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            int         foundBreakCount = 0;
126305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
126405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // Loop through the text, looking for ranges of dictionary characters.
126505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // For each span, find the appropriate break engine, and ask it to find
126605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // any breaks within the span.
126705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
126805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fText.setIndex(rangeStart);
126905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            int     c = CharacterIteration.current32(fText);
127005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            category = (short)fRData.fTrie.get(c);
127105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
127205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            while(true) {
127305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                while((current = fText.getIndex()) < rangeEnd && (category & 0x4000) == 0) {
127405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    c = CharacterIteration.next32(fText);    // pre-increment
127505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    category = (short)fRData.fTrie.get(c);
127605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                }
127705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                if (current >= rangeEnd) {
127805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    break;
127905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                }
128005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
128105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // We now have a dictionary character. Get the appropriate language object
128205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // to deal with it.
128305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                LanguageBreakEngine lbe = getLanguageBreakEngine(c);
128405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
128505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // Ask the language object if there are any breaks. It will add them to the cache and
128605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // leave the text pointer on the other side of its range, ready to search for the next one.
128705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                if (lbe != null) {
128805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    foundBreakCount += lbe.findBreaks(fText, rangeStart, rangeEnd, fBreakType, fBreaks);
128905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                }
129005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
129105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // Reload the loop variables for the next go-round
129205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                c = CharacterIteration.current32(fText);
129305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                category = (short)fRData.fTrie.get(c);
129405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
129505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
129605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // If we found breaks, ensure that the first and last entries are
129705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // the original starting and ending position. And initialize the
129805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // cache iteration position to the first entry.
129905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
130005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // System.out.printf("foundBreakCount = %d%n", foundBreakCount);
130105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if (foundBreakCount > 0) {
130205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                assert(foundBreakCount == fBreaks.size());
130305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                if (startPos < fBreaks.elementAt(0)) {
130405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    // The dictionary did not place a boundary at the start of the segment of text.
130505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    // Add one now. This should not commonly happen, but it would be easy for interactions
130605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    // of the rules for dictionary segments and the break engine implementations to
130705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    // inadvertently cause it. Cover it here, just in case.
130805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    fBreaks.offer(startPos);
130905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                }
131005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                if (endPos > fBreaks.peek()) {
131105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    fBreaks.push(endPos);
131205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                }
131305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                fPositionInCache = 0;
131405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // Note: Dictionary matching may extend beyond the original limit.
131505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                fStart = fBreaks.elementAt(0);
131605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                fLimit = fBreaks.peek();
131705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            } else {
131805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // there were no language-based breaks, even though the segment contained
131905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // dictionary characters. Subsequent attempts to fetch boundaries from the dictionary cache
132005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // for this range will fail, and the calling code will fall back to the rule based boundaries.
132105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
132205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
132305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        };
132405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
132505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
132605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        DictionaryCache() {
132705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fPositionInCache = -1;
132805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fBreaks = new DictionaryBreakEngine.DequeI();
132905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
133005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
133105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        /**
133205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert         * copy constructor. Used by RuleBasedBreakIterator.clone().
133305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert         *
133405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert         * @param src the source object to be copied.
133505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert         */
133605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        DictionaryCache(DictionaryCache src)  {
133705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            try {
133805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                fBreaks = (DictionaryBreakEngine.DequeI)src.fBreaks.clone();
133905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
134005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            catch (CloneNotSupportedException e) {
134105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                throw new RuntimeException(e);
134205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
134305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fPositionInCache      = src.fPositionInCache;
134405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fStart                = src.fStart;
134505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fLimit                = src.fLimit;
134605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fFirstRuleStatusIndex = src.fFirstRuleStatusIndex;
134705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fOtherRuleStatusIndex = src.fOtherRuleStatusIndex;
134805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fBoundary             = src.fBoundary;
134905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fStatusIndex          = src.fStatusIndex;
135005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
135105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
135205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // A data structure containing the boundaries themselves. Essentially a vector of raw ints.
135305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        DictionaryBreakEngine.DequeI fBreaks;
135405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int             fPositionInCache;       // Index in fBreaks of last boundary returned by following()
135505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        //                                      //    or preceding(). Optimizes sequential access.
135605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int             fStart;                 // Text position of first boundary in cache.
135705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int             fLimit;                 // Last boundary in cache. Which is the limit of the
135805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        //                                      //    text segment being handled by the dictionary.
135905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int             fFirstRuleStatusIndex;  // Rule status info for first boundary.
136005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int             fOtherRuleStatusIndex;  // Rule status info for 2nd through last boundaries.
136105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int             fBoundary;              // Current boundary. Set by preceding(), following().
136205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int             fStatusIndex;           // Current rule status index. Set by preceding, following().
136305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    };
136405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
136505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
136605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
136705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
136805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert/*
136905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * class BreakCache
137005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert *
137105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Cache of break boundary positions and rule status values.
137205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Break iterator API functions, next(), previous(), etc., will use cached results
137305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * when possible, and otherwise cache new results as they are obtained.
137405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert *
137505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Uniformly caches both dictionary and rule based (non-dictionary) boundaries.
137605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert *
137705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * The cache is implemented as a single circular buffer.
137805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert */
137905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
138005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert/*
138105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * size of the circular cache buffer.
138205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert */
138305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
138405fa7802d0874812c234a29745586677ee5837eaFredrik Roubertclass BreakCache {
138505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
138605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    BreakCache() {
138705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        reset();
138805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    };
138905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
139005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    void  reset(int pos, int ruleStatus) {
139105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fStartBufIdx = 0;
139205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fEndBufIdx = 0;
139305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fTextIdx = pos;
139405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fBufIdx = 0;
139505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fBoundaries[0] = pos;
139605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fStatuses[0] = (short)ruleStatus;
139705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    }
139805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
139905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    void  reset() {reset(0, 0); };
140005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
140105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    void  next() {
140205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (fBufIdx == fEndBufIdx) {
140305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fDone = !populateFollowing();
140405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fPosition = fTextIdx;
140505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fRuleStatusIndex = fStatuses[fBufIdx];
140605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        } else {
140705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fBufIdx = modChunkSize(fBufIdx + 1);
140805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fTextIdx = fPosition = fBoundaries[fBufIdx];
140905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fRuleStatusIndex = fStatuses[fBufIdx];
141005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
141105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    };
141205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
141305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    void  previous() {
141405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int initialBufIdx = fBufIdx;
141505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (fBufIdx == fStartBufIdx) {
141605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // At start of cache. Prepend to it.
141705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            populatePreceding();
141805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        } else {
141905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // Cache already holds the next boundary
142005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fBufIdx = modChunkSize(fBufIdx - 1);
142105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fTextIdx = fBoundaries[fBufIdx];
142205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
142305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fDone = (fBufIdx == initialBufIdx);
142405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fPosition = fTextIdx;
142505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fRuleStatusIndex = fStatuses[fBufIdx];
142605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return;
142705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    };
142805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
142905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    // Move the iteration state to the position following the startPosition.
143005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    // Input position must be pinned to the input length.
143105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    void following(int startPos) {
143205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (startPos == fTextIdx || seek(startPos) || populateNear(startPos)) {
143305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // startPos is in the cache. Do a next() from that position.
143405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // TODO: an awkward set of interactions with bi->fDone
143505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            //       seek() does not clear it; it can't because of interactions with populateNear().
143605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            //       next() does not clear it in the fast-path case, where everything matters. Maybe it should.
143705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            //       So clear it here, for the case where seek() succeeded on an iterator that had previously run off the end.
143805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fDone = false;
143905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            next();
144005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
144105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
144205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    };
144305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
144405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    void  preceding(int startPos) {
144505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (startPos == fTextIdx || seek(startPos) || populateNear(startPos)) {
144605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if (startPos == fTextIdx) {
144705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                previous();
144805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            } else {
144905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // seek() leaves the BreakCache positioned at the preceding boundary
145005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                //        if the requested position is between two bounaries.
145105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // current() pushes the BreakCache position out to the BreakIterator itself.
145205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                assert(startPos > fTextIdx);
145305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                current();
145405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
145505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
145605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return;
145705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    };
145805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
145905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    /*
146005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * Update the state of the public BreakIterator (fBI) to reflect the
146105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * current state of the break iterator cache (this).
146205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     */
146305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    int current() {
146405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fPosition = fTextIdx;
146505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fRuleStatusIndex = fStatuses[fBufIdx];
146605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fDone = false;
146705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return fTextIdx;
146805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    };
146905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
147005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    /**
147105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * Add boundaries to the cache near the specified position.
147205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * The given position need not be a boundary itself.
147305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * The input position must be within the range of the text, and
147405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * on a code point boundary.
147505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * If the requested position is a break boundary, leave the iteration
147605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * position on it.
147705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * If the requested position is not a boundary, leave the iteration
147805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * position on the preceding boundary and include both the the
147905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * preceding and following boundaries in the cache.
148005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * Additional boundaries, either preceding or following, may be added
148105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * to the cache as a side effect.
148205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *
148305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * Return false if the operation failed.
148405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     */
148505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    boolean populateNear(int position) {
148605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        assert(position < fBoundaries[fStartBufIdx] || position > fBoundaries[fEndBufIdx]);
148705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
148805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // Find a boundary somewhere in the vicinity of the requested position.
148905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // Depending on the safe rules and the text data, it could be either before, at, or after
149005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // the requested position.
149105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
149205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
149305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // If the requested position is not near already cached positions, clear the existing cache,
149405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // find a near-by boundary and begin new cache contents there.
149505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
149605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if ((position < fBoundaries[fStartBufIdx] - 15) || position > (fBoundaries[fEndBufIdx] + 15)) {
149705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            int aBoundary = fText.getBeginIndex();
149805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            int ruleStatusIndex = 0;
149905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // TODO: check for position == length of text. Although may still need to back up to get rule status.
150005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if (position > aBoundary + 20) {
150105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                int backupPos = handlePrevious(position);
150205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                fPosition = backupPos;
150305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                aBoundary = handleNext();                // Ignore dictionary, just finding a rule based boundary.
150405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                ruleStatusIndex = fRuleStatusIndex;
150505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
150605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            reset(aBoundary, ruleStatusIndex);               // Reset cache to hold aBoundary as a single starting point.
150705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
150805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
150905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // Fill in boundaries between existing cache content and the new requested position.
151005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
151105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (fBoundaries[fEndBufIdx] < position) {
151205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // The last position in the cache precedes the requested position.
151305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // Add following position(s) to the cache.
151405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            while (fBoundaries[fEndBufIdx] < position) {
151505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                if (!populateFollowing()) {
151605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    assert false;
151705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    return false;
151805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                }
151905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
152005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fBufIdx = fEndBufIdx;                      // Set iterator position to the end of the buffer.
152105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fTextIdx = fBoundaries[fBufIdx];           // Required because populateFollowing may add extra boundaries.
152205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            while (fTextIdx > position) {              // Move backwards to a position at or preceding the requested pos.
152305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                previous();
152405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
152505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return true;
152605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
152705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
152805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (fBoundaries[fStartBufIdx] > position) {
152905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // The first position in the cache is beyond the requested position.
153005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // back up more until we get a boundary <= the requested position.
153105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            while (fBoundaries[fStartBufIdx] > position) {
153205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                populatePreceding();
153305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
153405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fBufIdx = fStartBufIdx;                    // Set iterator position to the start of the buffer.
153505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fTextIdx = fBoundaries[fBufIdx];           // Required because populatePreceding may add extra boundaries.
153605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            while (fTextIdx < position) {              // Move forwards to a position at or following the requested pos.
153705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                next();
153805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
153905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if (fTextIdx > position) {
154005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // If position is not itself a boundary, the next() loop above will overshoot.
154105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // Back up one, leaving cache position at the boundary preceding the requested position.
154205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                previous();
154305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
154405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return true;
154505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
154605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
154705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        assert fTextIdx == position;
154805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return true;
154905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
155005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    };
155105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
155205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    /**
155305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *  Add boundary(s) to the cache following the current last boundary.
155405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *  Return false if at the end of the text, and no more boundaries can be added.
155505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *  Leave iteration position at the first newly added boundary, or unchanged if no boundary was added.
155605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     */
155705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    boolean populateFollowing() {
155805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int fromPosition = fBoundaries[fEndBufIdx];
155905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int fromRuleStatusIdx = fStatuses[fEndBufIdx];
156005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int pos = 0;
156105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int ruleStatusIdx = 0;
156205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
156305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (fDictionaryCache.following(fromPosition)) {
156405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            addFollowing(fDictionaryCache.fBoundary, fDictionaryCache.fStatusIndex, UpdateCachePosition);
156505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return true;
156605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
156705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
156805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fPosition = fromPosition;
156905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        pos = handleNext();
157005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (pos == BreakIterator.DONE) {
157105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return false;
157205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
157305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
157405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        ruleStatusIdx = fRuleStatusIndex;
157505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (fDictionaryCharCount > 0) {
157605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // The text segment obtained from the rules includes dictionary characters.
157705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // Subdivide it, with subdivided results going into the dictionary cache.
157805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fDictionaryCache.populateDictionary(fromPosition, pos, fromRuleStatusIdx, ruleStatusIdx);
157905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if (fDictionaryCache.following(fromPosition)) {
158005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                addFollowing(fDictionaryCache.fBoundary, fDictionaryCache.fStatusIndex, UpdateCachePosition);
158105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                return true;
158205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // TODO: may want to move a sizable chunk of the dictionary cache to the break cache at this point.
158305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                //       But be careful with interactions with populateNear().
158405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
158505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
158605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
158705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // Rule based segment did not include dictionary characters.
158805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // Or, it did contain dictionary chars, but the dictionary segmenter didn't handle them,
158905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        //    meaning that we didn't take the return, above.
159005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // Add its end point to the cache.
159105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
159205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
159305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // Add several non-dictionary boundaries at this point, to optimize straight forward iteration.
159405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        //    (subsequent calls to BreakIterator::next() will take the fast path, getting cached results.
159505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        //
159605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        for (int count=0; count<6; ++count) {
159705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            pos = handleNext();
159805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if (pos == BreakIterator.DONE || fDictionaryCharCount > 0) {
159905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                break;
160005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
160105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            addFollowing(pos, fRuleStatusIndex, RetainCachePosition);
160205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
160305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return true;
160405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    };
160505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
160605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    /**
160705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *  Add one or more boundaries to the cache preceding the first currently cached boundary.
160805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *  Leave the iteration position on the first added boundary.
160905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *  Return false if no boundaries could be added (if at the start of the text.)
161005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     */
161105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    boolean populatePreceding() {
161205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int textBegin = fText.getBeginIndex();
161305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int fromPosition = fBoundaries[fStartBufIdx];
161405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (fromPosition == textBegin) {
161505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return false;
161605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
161705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
161805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int position = textBegin;
161905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int positionStatusIdx = 0;
162005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
162105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (fDictionaryCache.preceding(fromPosition)) {
162205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            addPreceding(fDictionaryCache.fBoundary, fDictionaryCache.fStatusIndex, UpdateCachePosition);
162305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return true;
162405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
162505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
162605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int backupPosition = fromPosition;
162705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
162805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // Find a boundary somewhere preceding the first already-cached boundary
162905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        do {
163005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            backupPosition = backupPosition - 30;
163105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if (backupPosition <= textBegin) {
163205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                backupPosition = textBegin;
163305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            } else {
163405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                backupPosition = handlePrevious(backupPosition);
163505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
163605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if (backupPosition == BreakIterator.DONE || backupPosition == textBegin) {
163705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                position = textBegin;
163805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                positionStatusIdx = 0;
163905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            } else {
164005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                fPosition = backupPosition;  // TODO: pass starting position in a clearer way.
164105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                position = handleNext();
164205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                positionStatusIdx = fRuleStatusIndex;
164305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
164405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
164505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        } while (position >= fromPosition);
164605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
164705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // Find boundaries between the one we just located and the first already-cached boundary
164805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // Put them in a side buffer, because we don't yet know where they will fall in the circular cache buffer..
164905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
165005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fSideBuffer.removeAllElements();
165105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fSideBuffer.push(position);
165205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fSideBuffer.push(positionStatusIdx);
165305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
165405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        do {
165505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            int prevPosition = fPosition = position;
165605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            int prevStatusIdx = positionStatusIdx;
165705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            position = handleNext();
165805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            positionStatusIdx = fRuleStatusIndex;
165905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if (position == BreakIterator.DONE) {
166005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                break;
166105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
166205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
166305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            boolean segmentHandledByDictionary = false;
166405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if (fDictionaryCharCount != 0) {
166505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // Segment from the rules includes dictionary characters.
166605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // Subdivide it, with subdivided results going into the dictionary cache.
166705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                int dictSegEndPosition = position;
166805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                fDictionaryCache.populateDictionary(prevPosition, dictSegEndPosition, prevStatusIdx, positionStatusIdx);
166905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                while (fDictionaryCache.following(prevPosition)) {
167005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    position = fDictionaryCache.fBoundary;
167105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    positionStatusIdx = fDictionaryCache.fStatusIndex;
167205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    segmentHandledByDictionary = true;
167305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    assert(position > prevPosition);
167405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    if (position >= fromPosition) {
167505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        break;
167605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    }
167705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    assert(position <= dictSegEndPosition);
167805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    fSideBuffer.push(position);
167905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    fSideBuffer.push(positionStatusIdx);
168005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    prevPosition = position;
168105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                }
168205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                assert(position==dictSegEndPosition || position>=fromPosition);
168305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
168405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
168505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if (!segmentHandledByDictionary && position < fromPosition) {
168605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                fSideBuffer.push(position);
168705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                fSideBuffer.push(positionStatusIdx);
168805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
168905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        } while (position < fromPosition);
169005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
169105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // Move boundaries from the side buffer to the main circular buffer.
169205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        boolean success = false;
169305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (!fSideBuffer.isEmpty()) {
169405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            positionStatusIdx = fSideBuffer.pop();
169505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            position = fSideBuffer.pop();
169605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            addPreceding(position, positionStatusIdx, UpdateCachePosition);
169705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            success = true;
169805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
169905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
170005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        while (!fSideBuffer.isEmpty()) {
170105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            positionStatusIdx = fSideBuffer.pop();
170205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            position = fSideBuffer.pop();
170305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if (!addPreceding(position, positionStatusIdx, RetainCachePosition)) {
170405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // No space in circular buffer to hold a new preceding result while
170505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // also retaining the current cache (iteration) position.
170605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // Bailing out is safe; the cache will refill again if needed.
170705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                break;
170805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
170905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
171005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return success;
171105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    };
171205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
171305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
171405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    static final boolean RetainCachePosition = false;
171505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    static final boolean UpdateCachePosition = true;
171605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
171705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    /*
171805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * Add the boundary following the current position.
171905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * The current position can be left as it was, or changed to the newly added boundary,
172005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * as specified by the update parameter.
172105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     */
172205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    void addFollowing(int position, int ruleStatusIdx, boolean update) {
172305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        assert(position > fBoundaries[fEndBufIdx]);
172405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        assert(ruleStatusIdx <= Short.MAX_VALUE);
172505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int nextIdx = modChunkSize(fEndBufIdx + 1);
172605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (nextIdx == fStartBufIdx) {
172705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fStartBufIdx = modChunkSize(fStartBufIdx + 6);    // TODO: experiment. Probably revert to 1.
172805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
172905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fBoundaries[nextIdx] = position;
173005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fStatuses[nextIdx] = (short)ruleStatusIdx;
173105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fEndBufIdx = nextIdx;
173205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (update == UpdateCachePosition) {
173305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // Set current position to the newly added boundary.
173405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fBufIdx = nextIdx;
173505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fTextIdx = position;
173605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        } else {
173705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // Retaining the original cache position.
173805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // Check if the added boundary wraps around the buffer, and would over-write the original position.
173905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // It's the responsibility of callers of this function to not add too many.
174005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            assert(nextIdx != fBufIdx);
174105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
174205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
174305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    };
174405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
174505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
174605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    /*
174705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * Add the boundary preceding the current position.
174805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * The current position can be left as it was, or changed to the newly added boundary,
174905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * as specified by the update parameter.
175005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     */
175105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    boolean addPreceding(int position, int ruleStatusIdx, boolean update) {
175205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        assert(position < fBoundaries[fStartBufIdx]);
175305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        assert(ruleStatusIdx <= Short.MAX_VALUE);
175405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int nextIdx = modChunkSize(fStartBufIdx - 1);
175505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (nextIdx == fEndBufIdx) {
175605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if (fBufIdx == fEndBufIdx && update == RetainCachePosition) {
175705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // Failure. The insertion of the new boundary would claim the buffer position that is the
175805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // current iteration position. And we also want to retain the current iteration position.
175905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // (The buffer is already completely full of entries that precede the iteration position.)
176005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                return false;
176105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
176205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fEndBufIdx = modChunkSize(fEndBufIdx - 1);
176305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
176405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fBoundaries[nextIdx] = position;
176505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fStatuses[nextIdx] = (short)ruleStatusIdx;
176605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fStartBufIdx = nextIdx;
176705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (update == UpdateCachePosition) {
176805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fBufIdx = nextIdx;
176905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fTextIdx = position;
177005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
177105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return true;
177205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    };
177305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
177405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    /**
177505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *  Set the cache position to the specified position, or, if the position
177605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *  falls between to cached boundaries, to the preceding boundary.
177705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *  Fails if the requested position is outside of the range of boundaries currently held by the cache.
177805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *  The startPosition must be on a code point boundary.
177905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *
178005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *  Return true if successful, false if the specified position is after
178105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *  the last cached boundary or before the first.
178205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     */
178305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    boolean seek(int pos) {
178405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (pos < fBoundaries[fStartBufIdx] || pos > fBoundaries[fEndBufIdx]) {
178505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return false;
178605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
178705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (pos == fBoundaries[fStartBufIdx]) {
178805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // Common case: seek(0), from BreakIterator::first()
178905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fBufIdx = fStartBufIdx;
179005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fTextIdx = fBoundaries[fBufIdx];
179105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return true;
179205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
179305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (pos == fBoundaries[fEndBufIdx]) {
179405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fBufIdx = fEndBufIdx;
179505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fTextIdx = fBoundaries[fBufIdx];
179605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return true;
179705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
179805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
179905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int min = fStartBufIdx;
180005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int max = fEndBufIdx;
180105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        while (min != max) {
180205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            int probe = (min + max + (min>max ? CACHE_SIZE : 0)) / 2;
180305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            probe = modChunkSize(probe);
180405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if (fBoundaries[probe] > pos) {
180505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                max = probe;
180605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            } else {
180705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                min = modChunkSize(probe + 1);
180805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
180905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
181005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        assert(fBoundaries[max] > pos);
181105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fBufIdx = modChunkSize(max - 1);
181205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fTextIdx = fBoundaries[fBufIdx];
181305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        assert(fTextIdx <= pos);
181405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return true;
181505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
181605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    };
181705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
181805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
181905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    /**
182005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * copy constructor, used from RuleBasedBreakIterator.clone().
182105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     *
182205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * @param src
182305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     */
182405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    BreakCache(BreakCache src)  {
182505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fStartBufIdx = src.fStartBufIdx;
182605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fEndBufIdx = src.fEndBufIdx;
182705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fTextIdx = src.fTextIdx;
182805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fBufIdx = src.fBufIdx;
182905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fBoundaries = src.fBoundaries.clone();
183005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fStatuses = src.fStatuses.clone();
183105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        fSideBuffer = new DictionaryBreakEngine.DequeI();  // Transient, no need to clone contents.
183205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    }
183305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
183405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    void dumpCache() {
183505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        System.out.printf("fTextIdx:%d   fBufIdx:%d%n", fTextIdx, fBufIdx);
183605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        for (int i=fStartBufIdx; ; i=modChunkSize(i+1)) {
183705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            System.out.printf("%d  %d%n", i, fBoundaries[i]);
183805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if (i == fEndBufIdx) {
183905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                break;
184005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
184105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
184205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    };
184305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
184405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    private final int   modChunkSize(int index) { return index & (CACHE_SIZE - 1); };
184505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
184605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    static final int CACHE_SIZE = 128;
184705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    // static_assert((CACHE_SIZE & (CACHE_SIZE-1)) == 0, "CACHE_SIZE must be power of two.");
184805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
184905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    int                 fStartBufIdx;
185005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    int                 fEndBufIdx;    // inclusive
185105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
185205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    int                 fTextIdx;
185305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    int                 fBufIdx;
185405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
185505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    int[]               fBoundaries = new int[CACHE_SIZE];
185605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    short[]             fStatuses = new short[CACHE_SIZE];
185705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
185805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    DictionaryBreakEngine.DequeI   fSideBuffer = new DictionaryBreakEngine.DequeI();
185905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert};
186005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
186105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
186205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
186305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
18642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller}
18652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1866