12ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* GENERATED SOURCE. DO NOT MODIFY. */ 2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others. 3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License 42ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* 52ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ******************************************************************************* 608ae9f2909b2ec37f755dac4372553437e9d7cf6Paul Duffin * Copyright (C) 2005-2016 International Business Machines Corporation and 72ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * others. All Rights Reserved. 82ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ******************************************************************************* 92ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpackage android.icu.text; 122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport static android.icu.impl.CharacterIteration.DONE32; 142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport static android.icu.impl.CharacterIteration.next32; 152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport static android.icu.impl.CharacterIteration.nextTrail32; 162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport static android.icu.impl.CharacterIteration.previous32; 172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.io.ByteArrayOutputStream; 192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.io.IOException; 202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.io.InputStream; 212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.io.OutputStream; 222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.nio.ByteBuffer; 232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.text.CharacterIterator; 24495cb271e305cfb399d463f32210a371198f0abfFredrik Roubertimport java.util.ArrayList; 25495cb271e305cfb399d463f32210a371198f0abfFredrik Roubertimport java.util.List; 262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.CharacterIteration; 282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.ICUBinary; 292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.ICUDebug; 3005fa7802d0874812c234a29745586677ee5837eaFredrik Roubertimport android.icu.impl.Trie2; 312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UCharacter; 322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UProperty; 332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UScript; 342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/** 361c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert * Rule Based Break Iterator 372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * This is a port of the C++ class RuleBasedBreakIterator from ICU4C. 381c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert * 39836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide Only a subset of ICU is exposed in Android 402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpublic class RuleBasedBreakIterator extends BreakIterator { 422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //======================================================================= 432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Constructors & Factories 442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //======================================================================= 451c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 461c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert /** 472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * private constructor 482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private RuleBasedBreakIterator() { 502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fDictionaryCharCount = 0; 51495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert synchronized(gAllBreakEngines) { 52495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert fBreakEngines = new ArrayList<LanguageBreakEngine>(gAllBreakEngines); 53495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert } 542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Create a break iterator from a precompiled set of break rules. 581c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert * 592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Creating a break iterator from the binary rules is much faster than 601c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert * creating one from source rules. 611c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert * 622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The binary rules are generated by the RuleBasedBreakIterator.compileRules() function. 632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Binary break iterator rules are not guaranteed to be compatible between 642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * different versions of ICU. 651c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert * 662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param is an input stream supplying the compiled binary rules. 672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @throws IOException if there is an error while reading the rules from the InputStream. 682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see #compileRules(String, OutputStream) 692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static RuleBasedBreakIterator getInstanceFromCompiledRules(InputStream is) throws IOException { 712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RuleBasedBreakIterator This = new RuleBasedBreakIterator(); 722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller This.fRData = RBBIDataWrapper.get(ICUBinary.getByteBufferFromInputStreamAndCloseStream(is)); 731c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert return This; 742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Create a break iterator from a precompiled set of break rules. 782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Creating a break iterator from the binary rules is much faster than 802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * creating one from source rules. 812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The binary rules are generated by the RuleBasedBreakIterator.compileRules() function. 832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Binary break iterator rules are not guaranteed to be compatible between 842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * different versions of ICU. 852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param bytes a buffer supplying the compiled binary rules. 872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @throws IOException if there is an error while reading the rules from the buffer. 882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see #compileRules(String, OutputStream) 892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @deprecated This API is ICU internal only. 90836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide draft / provisional / internal are hidden on Android 912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller @Deprecated 932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static RuleBasedBreakIterator getInstanceFromCompiledRules(ByteBuffer bytes) throws IOException { 942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RuleBasedBreakIterator This = new RuleBasedBreakIterator(); 952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller This.fRData = RBBIDataWrapper.get(bytes); 962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return This; 972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 1002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Construct a RuleBasedBreakIterator from a set of rules supplied as a string. 1012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param rules The break rules to be used. 1022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 1032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public RuleBasedBreakIterator(String rules) { 1042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller this(); 1052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 1062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ByteArrayOutputStream ruleOS = new ByteArrayOutputStream(); 1072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller compileRules(rules, ruleOS); 1082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fRData = RBBIDataWrapper.get(ByteBuffer.wrap(ruleOS.toByteArray())); 1092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch (IOException e) { 1102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ///CLOVER:OFF 1112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // An IO exception can only arrive here if there is a bug in the RBBI Rule compiler, 1122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // causing bogus compiled rules to be produced, but with no compile error raised. 1132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RuntimeException rte = new RuntimeException("RuleBasedBreakIterator rule compilation internal error: " 1142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller + e.getMessage()); 1152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw rte; 1162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ///CLOVER:ON 1172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //======================================================================= 1212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Boilerplate 1222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //======================================================================= 1232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 1252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Clones this iterator. 1262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return A newly-constructed RuleBasedBreakIterator with the same 1272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * behavior as this one. 1282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 129f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 13005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert public Object clone() { 13105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert RuleBasedBreakIterator result; 13205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert result = (RuleBasedBreakIterator)super.clone(); 1332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fText != null) { 1341c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert result.fText = (CharacterIterator)(fText.clone()); 1352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 136495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert synchronized (gAllBreakEngines) { 137495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert result.fBreakEngines = new ArrayList<LanguageBreakEngine>(gAllBreakEngines); 138495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert } 139495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert result.fLookAheadMatches = new LookAheadResults(); 14005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert result.fBreakCache = result.new BreakCache(fBreakCache); 14105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert result.fDictionaryCache = result.new DictionaryCache(fDictionaryCache); 1422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return result; 1432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 1462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 1472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns true if both BreakIterators are of the same class, have the same 1482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * rules, and iterate over the same text. 1492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 150f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 1512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean equals(Object that) { 1522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (that == null) { 1532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 1542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (this == that) { 1562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 1572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 1592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RuleBasedBreakIterator other = (RuleBasedBreakIterator) that; 1602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fRData != other.fRData && (fRData == null || other.fRData == null)) { 1612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 1622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1631c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert if (fRData != null && other.fRData != null && 1642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (!fRData.fRuleSource.equals(other.fRData.fRuleSource))) { 1652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 1662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fText == null && other.fText == null) { 1681c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert return true; 1692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (fText == null || other.fText == null || !fText.equals(other.fText)) { 1711c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert return false; 1722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return fPosition == other.fPosition; 1742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller catch(ClassCastException e) { 1762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 1772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 1812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns the description (rules) used to create this iterator. 1822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * (In ICU4C, the same function is RuleBasedBreakIterator::getRules()) 1832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 184f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 1852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public String toString() { 1862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String retStr = ""; 1872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fRData != null) { 1882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller retStr = fRData.fRuleSource; 1892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return retStr; 1912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 1942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Compute a hashcode for this BreakIterator 1952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return A hash code 1962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 197f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 1982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int hashCode() 1992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 2001c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert return fRData.fRuleSource.hashCode(); 2012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int START_STATE = 1; // The state number of the starting state 2052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int STOP_STATE = 0; // The state-transition value indicating "stop" 2061c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 2072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // RBBIRunMode - the state machine runs an extra iteration at the beginning and end 2082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // of user text. A variable with this enum type keeps track of where we 2092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // are. The state machine only fetches user text input while in RUN mode. 2102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int RBBI_START = 0; 2112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int RBBI_RUN = 1; 2122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int RBBI_END = 2; 2132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 2152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The character iterator through which this BreakIterator accesses the text. 2162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private CharacterIterator fText = new java.text.StringCharacterIterator(""); 2181c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 2192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The rule data for this BreakIterator instance. Package private. 2212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RBBIDataWrapper fRData; 2231c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 22405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert /** 22505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * The iteration state - current position, rule status for the current position, 22605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * and whether the iterator ran off the end, yielding UBRK_DONE. 22705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Current position is pinned to be 0 < position <= text.length. 22805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Current position is always set to a boundary. 22905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * 23005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * The current position of the iterator. Pinned, 0 < fPosition <= text.length. 23105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Never has the value UBRK_DONE (-1). 23205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert */ 23305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert private int fPosition; 23405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 23505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert /** 2361c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert * Index of the Rule {tag} values for the most recent match. 2372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 23805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert private int fRuleStatusIndex; 2392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 24005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert /** 24105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * True when iteration has run off the end, and iterator functions should return UBRK_DONE. 2422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 24305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert private boolean fDone; 24405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 24505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert /** 24605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Cache of previously determined boundary positions. 24705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert */ 24805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert private BreakCache fBreakCache = new BreakCache(); 24905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 2502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Counter for the number of characters encountered with the "dictionary" 2532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * flag set. Normal RBBI iterators don't use it, although the code 2542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * for updating it is live. Dictionary Based break iterators (a subclass 2552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * of us) access this field directly. 256836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide draft / provisional / internal are hidden on Android 2572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int fDictionaryCharCount; 2592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 26005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert private DictionaryCache fDictionaryCache = new DictionaryCache(); 26105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 2622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 2632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * ICU debug argument name for RBBI 2642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final String RBBI_DEBUG_ARG = "rbbi"; 2662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Debugging flag. Trace operation of state machine when true. 2692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final boolean TRACE = ICUDebug.enabled(RBBI_DEBUG_ARG) 2712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller && ICUDebug.value(RBBI_DEBUG_ARG).indexOf("trace") >= 0; 2722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 27405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * What kind of break iterator this is. 27505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Defaulting BreakType to word gives reasonable dictionary behavior for 27605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Break Iterators that are built from rules. 2772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 27805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert private int fBreakType = KIND_WORD; 2791c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 2802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The "default" break engine - just skips over ranges of dictionary words, 2822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * producing no breaks. Should only be used if characters need to be handled 2832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * by a dictionary but we have no dictionary implementation for them. 284495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert * 285495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert * Only one instance; shared by all break iterators. 2862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 287495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert private static final UnhandledBreakEngine gUnhandledBreakEngine; 288495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert 289495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert /** 290495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert * List of all known break engines, common for all break iterators. 291495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert * Lazily updated as break engines are needed, because instantiation of 292495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert * break engines is expensive. 293495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert * 294495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert * Because gAllBreakEngines can be referenced concurrently from different 295495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert * BreakIterator instances, all access is synchronized. 296495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert */ 297495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert private static final List<LanguageBreakEngine> gAllBreakEngines; 298495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert 299495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert static { 300495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert gUnhandledBreakEngine = new UnhandledBreakEngine(); 301495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert gAllBreakEngines = new ArrayList<LanguageBreakEngine>(); 302495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert gAllBreakEngines.add(gUnhandledBreakEngine); 303495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert } 3041c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 3052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 306495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert * List of all known break engines. Similar to gAllBreakEngines, but local to a 307495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert * break iterator, allowing it to be used without synchronization. 308495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert */ 309495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert private List<LanguageBreakEngine> fBreakEngines; 310495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert 3112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 3122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Dump the contents of the state table and character classes for this break iterator. 3132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * For debugging only. 3142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @deprecated This API is ICU internal only. 315836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide draft / provisional / internal are hidden on Android 3162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller @Deprecated 318f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert public void dump(java.io.PrintStream out) { 319f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert if (out == null) { 320f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert out = System.out; 321f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert } 322f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert this.fRData.dump(out); 3232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 3262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Compile a set of source break rules into the binary state tables used 3272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * by the break iterator engine. Creating a break iterator from precompiled 3282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * rules is much faster than creating one from source rules. 3291c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert * 3302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Binary break rules are not guaranteed to be compatible between different 3312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * versions of ICU. 3321c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert * 3331c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert * 3342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param rules The source form of the break rules 3352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param ruleBinary An output stream to receive the compiled rules. 3362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @throws IOException If there is an error writing the output. 3372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see #getInstanceFromCompiledRules(InputStream) 3382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static void compileRules(String rules, OutputStream ruleBinary) throws IOException { 3402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RBBIRuleBuilder.compileRules(rules, ruleBinary); 3412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3421c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 3432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //======================================================================= 3442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // BreakIterator overrides 3452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //======================================================================= 3462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 3482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Sets the current iteration position to the beginning of the text. 3492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * (i.e., the CharacterIterator's starting offset). 3502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return The offset of the beginning of the text. 3512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 352f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 3532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int first() { 3542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fText == null) { 3552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return BreakIterator.DONE; 3562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fText.first(); 35805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int start = fText.getIndex(); 35905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (!fBreakCache.seek(start)) { 36005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBreakCache.populateNear(start); 36105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 36205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBreakCache.current(); 36305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert assert(fPosition == start); 36405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return fPosition; 3652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3661c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 3672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 3682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Sets the current iteration position to the end of the text. 3692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * (i.e., the CharacterIterator's ending offset). 3702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return The text's past-the-end offset. 3712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 372f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 3732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int last() { 3742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fText == null) { 3752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return BreakIterator.DONE; 3762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 37705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int endPos = fText.getEndIndex(); 37805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert boolean endShouldBeBoundary = isBoundary(endPos); // Has side effect of setting iterator position. 37905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert assert(endShouldBeBoundary); 38005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (fPosition != endPos) { 38105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert assert(fPosition == endPos); 38205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 38305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return endPos; 3842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3851c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 3862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 3872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Advances the iterator either forward or backward the specified number of steps. 3882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Negative values move backward, and positive values move forward. This is 3892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * equivalent to repeatedly calling next() or previous(). 3902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param n The number of steps to move. The sign indicates the direction 3912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * (negative is backwards, and positive is forwards). 3922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return The character offset of the boundary position n boundaries away from 3932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the current one. 3942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 395f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 3962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int next(int n) { 39705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int result = 0; 39805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (n > 0) { 39905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert for (; n > 0 && result != DONE; --n) { 40005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert result = next(); 40105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 40205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else if (n < 0) { 40305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert for (; n < 0 && result != DONE; ++n) { 40405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert result = previous(); 40505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 40605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else { 40705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert result = current(); 4082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return result; 4102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4111c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 4122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 4132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Advances the iterator to the next boundary position. 4142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return The position of the first boundary after this one. 4152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 416f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 4172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int next() { 41805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBreakCache.next(); 41905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return fDone ? DONE : fPosition; 4202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 42305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Moves the iterator backwards, to the boundary preceding the current one. 42405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * @return The position of the boundary position immediately preceding the starting position. 4252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 426f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 4272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int previous() { 42805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBreakCache.previous(); 42905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return fDone ? DONE : fPosition; 4302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 4332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Sets the iterator to refer to the first boundary position following 4342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the specified position. 43505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * @param startPos The position from which to begin searching for a break position. 4362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return The position of the first break after the current position. 4372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 438f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 43905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert public int following(int startPos) { 44005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // if the supplied position is before the beginning, return the 4412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // text's starting offset 44205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (startPos < fText.getBeginIndex()) { 4432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return first(); 4442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 44605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Move requested offset to a code point start. It might be on a trail surrogate. 44705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Or it may be beyond the end of the text. 44805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert startPos = CISetIndex32(fText, startPos); 44905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBreakCache.following(startPos); 45005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return fDone ? DONE : fPosition; 45105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 4522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 4552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Sets the iterator to refer to the last boundary position before the 4562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * specified position. 4572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param offset The position to begin searching for a break from. 4582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return The position of the last boundary before the starting position. 4592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 460f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 4612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int preceding(int offset) { 4622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fText == null || offset > fText.getEndIndex()) { 4632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return last(); 46405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else if (offset < fText.getBeginIndex()) { 4652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return first(); 4662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 46805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Move requested offset to a code point start. It might be on a trail surrogate. 46905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // int adjustedOffset = CISetIndex32(fText, offset); // TODO: restore to match ICU4C behavior. 47005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int adjustedOffset = offset; 47105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBreakCache.preceding(adjustedOffset); 47205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return fDone ? DONE : fPosition; 4732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 47605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 4772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 47808ae9f2909b2ec37f755dac4372553437e9d7cf6Paul Duffin * Throw IllegalArgumentException unless begin <= offset < end. 4792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 4802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller protected static final void checkOffset(int offset, CharacterIterator text) { 4812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (offset < text.getBeginIndex() || offset > text.getEndIndex()) { 4822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("offset out of bounds"); 4832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 4882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns true if the specified position is a boundary position. As a side 4892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * effect, leaves the iterator pointing to the first boundary position at 4902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * or after "offset". 4912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param offset the offset to check. 4922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return True if "offset" is a boundary position. 4932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 494f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 4952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean isBoundary(int offset) { 49605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // TODO: behavior difference with ICU4C, which considers out-of-range offsets 49705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // to not be boundaries, and to not be errors. 4982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller checkOffset(offset, fText); 4992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 50005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Adjust offset to be on a code point boundary and not beyond the end of the text. 50105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Note that isBoundary() is always be false for offsets that are not on code point boundaries. 50205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // But we still need the side effect of leaving iteration at the following boundary. 50305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int adjustedOffset = CISetIndex32(fText, offset); 5042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 50505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert boolean result = false; 50605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (fBreakCache.seek(adjustedOffset) || fBreakCache.populateNear(adjustedOffset)) { 50705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert result = (fBreakCache.current() == offset); 5082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 51005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (!result) { 51105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Not on a boundary. isBoundary() must leave iterator on the following boundary. 51205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // fBreakCache.seek(), above, left us on the preceding boundary, so advance one. 51305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert next(); 51405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 5152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return result; 51605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 5172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 52005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Returns the current iteration position. Note that UBRK_DONE is never 52105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * returned from this function; if iteration has run to the end of a 52205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * string, current() will return the length of the string while 52305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * next() will return BreakIterator.DONE). 5242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return The current iteration position. 5252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 526f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 5272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int current() { 52805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return (fText != null) ? fPosition : BreakIterator.DONE; 5292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 5332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Return the status tag from the break rule that determined the most recently 5342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * returned break position. The values appear in the rule source 5352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * within brackets, {123}, for example. For rules that do not specify a 5362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * status, a default value of 0 is returned. If more than one rule applies, 5372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the numerically largest of the possible status values is returned. 5382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p> 53905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Of the standard types of ICU break iterators, only the word and line break 5402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * iterator provides status values. The values are defined in 5412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * class RuleBasedBreakIterator, and allow distinguishing between words 5422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * that contain alphabetic letters, "words" that appear to be numbers, 5432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * punctuation and spaces, words containing ideographic characters, and 5442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * more. Call <code>getRuleStatus</code> after obtaining a boundary 54508ae9f2909b2ec37f755dac4372553437e9d7cf6Paul Duffin * position from <code>next()</code>, <code>previous()</code>, or 5462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * any other break iterator functions that returns a boundary position. 5472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p> 5482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return the status from the break rule that determined the most recently 5492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * returned break position. 5502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 5512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 552f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 5532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int getRuleStatus() { 5542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Status records have this form: 5552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Count N <-- fLastRuleStatusIndex points here. 5562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Status val 0 5572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Status val 1 5582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // ... 5592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Status val N-1 <-- the value we need to return 5602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The status values are sorted in ascending order. 5612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // This function returns the last (largest) of the array of status values. 56205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int idx = fRuleStatusIndex + fRData.fStatusTable[fRuleStatusIndex]; 5632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int tagVal = fRData.fStatusTable[idx]; 5642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return tagVal; 5652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 5681c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert * Get the status (tag) values from the break rule(s) that determined the most 5692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * recently returned break position. The values appear in the rule source 5702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * within brackets, {123}, for example. The default status value for rules 5712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * that do not explicitly provide one is zero. 5722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p> 5732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The status values used by the standard ICU break rules are defined 5742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * as public constants in class RuleBasedBreakIterator. 5752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p> 5762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If the size of the output array is insufficient to hold the data, 5772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the output will be truncated to the available length. No exception 5782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * will be thrown. 5792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 5801c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert * @param fillInArray an array to be filled in with the status values. 5811c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert * @return The number of rule status values from rules that determined 5822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the most recent boundary returned by the break iterator. 5832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * In the event that the array is too small, the return value 5842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * is the total number of status values that were available, 5852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * not the reduced number that were actually returned. 5862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 587f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 5882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int getRuleStatusVec(int[] fillInArray) { 58905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int numStatusVals = fRData.fStatusTable[fRuleStatusIndex]; 5901c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert if (fillInArray != null) { 5912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int numToCopy = Math.min(numStatusVals, fillInArray.length); 5922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i=0; i<numToCopy; i++) { 59305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fillInArray[i] = fRData.fStatusTable[fRuleStatusIndex + i + 1]; 5942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return numStatusVals; 5972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 6002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Return a CharacterIterator over the text being analyzed. This version 6012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * of this method returns the actual CharacterIterator we're using internally. 6022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Changing the state of this iterator can have undefined consequences. If 6032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * you need to change it, clone it first. 6042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return An iterator over the text being analyzed. 6052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 606f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 6072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public CharacterIterator getText() { 6082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return fText; 6092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 6122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Set the iterator to analyze a new piece of text. This function resets 6132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the current iteration position to the beginning of the text. 6142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param newText An iterator over the text to analyze. 6152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 616f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 6172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void setText(CharacterIterator newText) { 61805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (newText != null) { 61905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBreakCache.reset(newText.getBeginIndex(), 0); 62005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else { 62105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBreakCache.reset(); 62205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 62305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fDictionaryCache.reset(); 6242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fText = newText; 6252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller this.first(); 6262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 6292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * package private 6302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 6312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller void setBreakType(int type) { 6322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fBreakType = type; 6332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 6362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * package private 6372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 6382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int getBreakType() { 6392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return fBreakType; 6402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 6432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Control debug, trace and dump options. 644836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide draft / provisional / internal are hidden on Android 6452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 6462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static final String fDebugEnv = ICUDebug.enabled(RBBI_DEBUG_ARG) ? 6472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ICUDebug.value(RBBI_DEBUG_ARG) : null; 6481c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 6491c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 6502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private LanguageBreakEngine getLanguageBreakEngine(int c) { 6512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We have a dictionary character. 6532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Does an already instantiated break engine handle it? 654495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert for (LanguageBreakEngine candidate : fBreakEngines) { 6552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (candidate.handles(c, fBreakType)) { 6562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return candidate; 6572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 660495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert synchronized (gAllBreakEngines) { 661495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert // This break iterator's list of break engines didn't handle the character. 662495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert // Check the global list, another break iterator may have instantiated the 663495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert // desired engine. 664495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert for (LanguageBreakEngine candidate : gAllBreakEngines) { 665495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert if (candidate.handles(c, fBreakType)) { 666495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert fBreakEngines.add(candidate); 667495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert return candidate; 668495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert } 669495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert } 670495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert 671495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert // The global list doesn't have an existing engine, build one. 672495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT); 673495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert if (script == UScript.KATAKANA || script == UScript.HIRAGANA) { 674495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert // Katakana, Hiragana and Han are handled by the same dictionary engine. 675495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert // Fold them together for mapping from script -> engine. 676495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert script = UScript.HAN; 677495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert } 6781c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 679495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert LanguageBreakEngine eng; 6802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 6812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller switch (script) { 6822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case UScript.THAI: 6832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller eng = new ThaiBreakEngine(); 6842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 6852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case UScript.LAO: 6862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller eng = new LaoBreakEngine(); 6872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 6882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case UScript.MYANMAR: 6892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller eng = new BurmeseBreakEngine(); 6902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 6912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case UScript.KHMER: 6922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller eng = new KhmerBreakEngine(); 6932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 6942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case UScript.HAN: 6952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (getBreakType() == KIND_WORD) { 6962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller eng = new CjkBreakEngine(false); 6972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 699495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert gUnhandledBreakEngine.handleChar(c, getBreakType()); 700495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert eng = gUnhandledBreakEngine; 7012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 7032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case UScript.HANGUL: 7042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (getBreakType() == KIND_WORD) { 7052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller eng = new CjkBreakEngine(true); 7062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 707495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert gUnhandledBreakEngine.handleChar(c, getBreakType()); 708495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert eng = gUnhandledBreakEngine; 7092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 7112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller default: 712495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert gUnhandledBreakEngine.handleChar(c, getBreakType()); 713495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert eng = gUnhandledBreakEngine; 7142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 7152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch (IOException e) { 7172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller eng = null; 7182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 720495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert if (eng != null && eng != gUnhandledBreakEngine) { 721495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert gAllBreakEngines.add(eng); 722495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert fBreakEngines.add(eng); 7232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 724495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert return eng; 725495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert } // end synchronized(gAllBreakEngines) 7262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7281c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert private static final int kMaxLookaheads = 8; 7291c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert private static class LookAheadResults { 7301c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert int fUsedSlotLimit; 7311c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert int[] fPositions; 7321c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert int[] fKeys; 7331c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 7341c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert LookAheadResults() { 7351c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert fUsedSlotLimit= 0; 7361c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert fPositions = new int[kMaxLookaheads]; 7371c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert fKeys = new int[kMaxLookaheads]; 7381c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert } 7391c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 7401c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert int getPosition(int key) { 7411c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert for (int i=0; i<fUsedSlotLimit; ++i) { 7421c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert if (fKeys[i] == key) { 7431c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert return fPositions[i]; 7441c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert } 7451c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert } 7461c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert assert(false); 7471c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert return -1; 7481c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert } 7491c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 7501c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert void setPosition(int key, int position) { 7511c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert int i; 7521c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert for (i=0; i<fUsedSlotLimit; ++i) { 7531c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert if (fKeys[i] == key) { 7541c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert fPositions[i] = position; 7551c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert return; 7561c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert } 7571c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert } 7581c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert if (i >= kMaxLookaheads) { 7591c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert assert(false); 7601c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert i = kMaxLookaheads - 1; 7611c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert } 7621c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert fKeys[i] = key; 7631c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert fPositions[i] = position; 7641c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert assert(fUsedSlotLimit == i); 7651c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert fUsedSlotLimit = i + 1; 7661c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert } 7671c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 7681c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert void reset() { 7691c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert fUsedSlotLimit = 0; 7701c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert } 7711c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert }; 7721c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert private LookAheadResults fLookAheadMatches = new LookAheadResults(); 7731c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 7742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 7762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The State Machine Engine for moving forward is here. 7772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * This function is the heart of the RBBI run time engine. 7781c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert * 77905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Input 78005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * fPosition, the position in the text to begin from. 78105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Output 78205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * fPosition: the boundary following the starting position. 78305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * fDictionaryCharCount the number of dictionary characters encountered. 78405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * If > 0, the segment will be further subdivided 78505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * fRuleStatusIndex Info from the state table indicating which rules caused the boundary. 78605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * 7872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return the new iterator position 7881c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert * 7892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * A note on supplementary characters and the position of underlying 7902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Java CharacterIterator: Normally, a character iterator is positioned at 7912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the char most recently returned by next(). Within this function, when 7922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * a supplementary char is being processed, the char iterator is left 7932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * sitting on the trail surrogate, in the middle of the code point. 7942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * This is different from everywhere else, where an iterator always 7952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * points at the lead surrogate of a supplementary. 7962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 79705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert private int handleNext() { 7982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (TRACE) { 7992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.println("Handle Next pos char state category"); 8002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 80205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // handleNext always sets the break tag value. 80305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Set the default for it. 80405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fRuleStatusIndex = 0; 80505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fDictionaryCharCount = 0; 8062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // caches for quicker access 8082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CharacterIterator text = fText; 80905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert Trie2 trie = fRData.fTrie; 81005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 81105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert short[] stateTable = fRData.fFTable; 81205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int initialPosition = fPosition; 81305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert text.setIndex(initialPosition); 81405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int result = initialPosition; 8152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Set up the starting char 81705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int c = text.current(); 8182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE) { 8192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = nextTrail32(text, c); 8202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c == DONE32) { 82105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fDone = true; 8222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return BreakIterator.DONE; 8232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Set the initial state for the state machine 8272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int state = START_STATE; 8281c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert int row = fRData.getRowIndex(state); 8292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller short category = 3; 8302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int flagsState = fRData.getStateTableFlags(stateTable); 8312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int mode = RBBI_RUN; 8322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if ((flagsState & RBBIDataWrapper.RBBI_BOF_REQUIRED) != 0) { 8332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller category = 2; 8342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller mode = RBBI_START; 8352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (TRACE) { 8361c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert System.out.print(" " + RBBIDataWrapper.intToString(text.getIndex(), 5)); 8372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.print(RBBIDataWrapper.intToHexString(c, 10)); 8382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.println(RBBIDataWrapper.intToString(state,7) + RBBIDataWrapper.intToString(category,6)); 8392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8411c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert fLookAheadMatches.reset(); 8422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // loop until we reach the end of the text or transition to state 0 8442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (state != STOP_STATE) { 8452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c == DONE32) { 8462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Reached end of input string. 8472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (mode == RBBI_END) { 8482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We have already run the loop one last time with the 8492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // character set to the pseudo {eof} value. Now it is time 8502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // to unconditionally bail out. 8512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 8522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Run the loop one last time with the fake end-of-input character category 8542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller mode = RBBI_END; 8552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller category = 1; 8562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else if (mode == RBBI_RUN) { 8582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Get the char category. An incoming category of 1 or 2 mens that 8592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // we are preset for doing the beginning or end of input, and 8602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // that we shouldn't get a category from an actual text input character. 8612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 8622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // look up the current character's character category, which tells us 8642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // which column in the state table to look at. 8652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 86605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert category = (short) trie.get(c); 8671c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 8682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Check the dictionary bit in the character's category. 8692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Counter is only used by dictionary based iterators (subclasses). 8702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Chars that need to be handled by a dictionary have a flag bit set 8712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // in their category values. 8722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 8732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if ((category & 0x4000) != 0) { 8742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fDictionaryCharCount++; 8752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // And off the dictionary flag bit. 8762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller category &= ~0x4000; 8772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (TRACE) { 8801c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert System.out.print(" " + RBBIDataWrapper.intToString(text.getIndex(), 5)); 8812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.print(RBBIDataWrapper.intToHexString(c, 10)); 8822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.println(RBBIDataWrapper.intToString(state,7) + RBBIDataWrapper.intToString(category,6)); 8832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8851c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert // Advance to the next character. 8862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If this is a beginning-of-input loop iteration, don't advance. 8872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The next iteration will be processing the first real input character. 888f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert c = text.next(); 8892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE) { 8902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = nextTrail32(text, c); 8912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 8942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller mode = RBBI_RUN; 8952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // look up a state transition in the state table 8982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller state = stateTable[row + RBBIDataWrapper.NEXTSTATES + category]; 8991c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert row = fRData.getRowIndex(state); 9002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (stateTable[row + RBBIDataWrapper.ACCEPTING] == -1) { 9022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Match found, common case 9032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result = text.getIndex(); 9042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && c <= UTF16.CODEPOINT_MAX_VALUE) { 9052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The iterator has been left in the middle of a surrogate pair. 9062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We want the start of it. 9072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result--; 9082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Remember the break status (tag) values. 91105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fRuleStatusIndex = stateTable[row + RBBIDataWrapper.TAGIDX]; 9122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9141c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert int completedRule = stateTable[row + RBBIDataWrapper.ACCEPTING]; 9151c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert if (completedRule > 0) { 9161c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert // Lookahead match is completed 9171c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert int lookaheadResult = fLookAheadMatches.getPosition(completedRule); 9181c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert if (lookaheadResult >= 0) { 91905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fRuleStatusIndex = stateTable[row + RBBIDataWrapper.TAGIDX]; 92005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fPosition = lookaheadResult; 9211c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert return lookaheadResult; 9222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9231c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert } 9242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9251c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert int rule = stateTable[row + RBBIDataWrapper.LOOKAHEAD]; 9261c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert if (rule != 0) { 9271c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert // At the position of a '/' in a look-ahead match. Record it. 9281c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert int pos = text.getIndex(); 9292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && c <= UTF16.CODEPOINT_MAX_VALUE) { 9302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The iterator has been left in the middle of a surrogate pair. 9312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We want the beginning of it. 9321c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert pos--; 9332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9341c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert fLookAheadMatches.setPosition(rule, pos); 9352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9371c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 9382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } // End of state machine main loop 9392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The state machine is done. Check whether it found a match... 9412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If the iterator failed to advance in the match engine force it ahead by one. 9432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // This indicates a defect in the break rules, which should always match 9442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // at least one character. 9451c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 9462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (result == initialPosition) { 9472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (TRACE) { 9482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.println("Iterator did not move. Advancing by 1."); 9492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller text.setIndex(initialPosition); 9512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller next32(text); 9522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result = text.getIndex(); 95305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fRuleStatusIndex = 0; 9542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 95505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 95605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Leave the iterator at our result position. 95705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // (we may have advanced beyond the last accepting position chasing after 95805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // longer matches that never completed.) 95905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fPosition = result; 96005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 9612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (TRACE) { 9622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.println("result = " + result); 9632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return result; 9652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 96705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert /** 96805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Iterate backwards from an arbitrary position in the input text using the Safe Reverse rules. 96905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * This locates a "Safe Position" from which the forward break rules 97005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * will operate correctly. A Safe Position is not necessarily a boundary itself. 97105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * 97205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * The logic of this function is very similar to handleNext(), above. 97305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * 97405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * @param fromPosition the position in the input text to begin the iteration. 97505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * @hide draft / provisional / internal are hidden on Android 97605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert */ 97705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert private int handlePrevious(int fromPosition) { 97805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (fText == null) { 9792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; 9802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9811c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 9822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int state; 9832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int category = 0; 9842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int mode; 9851c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert int row; 9862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c; 9872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int result = 0; 98805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int initialPosition = fromPosition; 9891c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert fLookAheadMatches.reset(); 99005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert short[] stateTable = fRData.fSRTable; 99105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert CISetIndex32(fText, fromPosition); 99205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (fromPosition == fText.getBeginIndex()) { 99305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return BreakIterator.DONE; 99405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 9951c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 9962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // set up the starting char 9972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result = initialPosition; 9982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = previous32(fText); 9991c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 10002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Set up the initial state for the state machine 10012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller state = START_STATE; 10022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller row = fRData.getRowIndex(state); 10032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller category = 3; // TODO: obsolete? from the old start/run mode scheme? 10042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller mode = RBBI_RUN; 10052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if ((fRData.getStateTableFlags(stateTable) & RBBIDataWrapper.RBBI_BOF_REQUIRED) != 0) { 10062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller category = 2; 10072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller mode = RBBI_START; 10082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10091c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 10102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (TRACE) { 10112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.println("Handle Prev pos char state category "); 10122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10131c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 10142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // loop until we reach the beginning of the text or transition to state 0 10152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 10162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller mainLoop: for (;;) { 10171c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert if (c == DONE32) { 10181c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert // Reached end of input string. 101905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (mode == RBBI_END) { 102005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // We have already done the {eof} iteration. Now is the time 10211c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert // to unconditionally bail out. 10221c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert break mainLoop; 10232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10241c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert mode = RBBI_END; 10251c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert category = 1; 10261c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert } 10271c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 10281c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert if (mode == RBBI_RUN) { 10291c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert // look up the current character's category, which tells us 10301c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert // which column in the state table to look at. 10311c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert // 103205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // And off the dictionary flag bit. For reverse iteration it is not used. 103305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert category = (short) fRData.fTrie.get(c); 103405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert category &= ~0x4000; 10351c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert } 10361c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 10371c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert if (TRACE) { 10381c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert System.out.print(" " + fText.getIndex() + " "); 10391c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert if (0x20 <= c && c < 0x7f) { 10401c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert System.out.print(" " + c + " "); 10411c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert } else { 10421c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert System.out.print(" " + Integer.toHexString(c) + " "); 10431c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert } 10441c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert System.out.println(" " + state + " " + category + " "); 10451c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert } 10461c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 10471c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert // State Transition - move machine to its next state 10481c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert // 10491c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert state = stateTable[row + RBBIDataWrapper.NEXTSTATES + category]; 10501c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert row = fRData.getRowIndex(state); 10511c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 10521c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert if (stateTable[row + RBBIDataWrapper.ACCEPTING] == -1) { 10531c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert // Match found, common case, could have lookahead so we move 10541c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert // on to check it 10551c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert result = fText.getIndex(); 10561c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert } 10571c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 10581c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 10591c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert int completedRule = stateTable[row + RBBIDataWrapper.ACCEPTING]; 10601c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert if (completedRule > 0) { 10611c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert // Lookahead match is completed. 10621c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert int lookaheadResult = fLookAheadMatches.getPosition(completedRule); 10631c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert if (lookaheadResult >= 0) { 10641c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert result = lookaheadResult; 10651c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert break mainLoop; 10662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10671c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert } 10681c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert int rule = stateTable[row + RBBIDataWrapper.LOOKAHEAD]; 10691c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert if (rule != 0) { 10701c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert // At the position of a '/' in a look-ahead match. Record it. 10711c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert int pos = fText.getIndex(); 10721c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert fLookAheadMatches.setPosition(rule, pos); 10731c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert } 10741c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 10752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (state == STOP_STATE) { 10762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Normal loop exit is here 10772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break mainLoop; 10782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10791c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 10802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // then move iterator position backwards one character 10812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 10822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (mode == RBBI_RUN) { 10832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = previous32(fText); 10842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 10852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (mode == RBBI_START) { 10862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller mode = RBBI_RUN; 10872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10891c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 10901c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 10912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } // End of the main loop. 10921c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 10932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The state machine is done. Check whether it found a match... 10942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 109505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // If the iterator failed to move in the match engine, force it back by one code point. 10962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // (This really indicates a defect in the break rules. They should always match 10972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // at least one character.) 10982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (result == initialPosition) { 109905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert CISetIndex32(fText, initialPosition); 11002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller previous32(fText); 11012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result = fText.getIndex(); 11022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11031c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 11042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (TRACE) { 11052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.println("Result = " + result); 11062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11071c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 11082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return result; 11092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 111005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 111105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert /** 111205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Set the index of a CharacterIterator. 111305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Pin the index to the valid range range of BeginIndex <= index <= EndIndex. 111405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * If the index points to a trail surrogate of a supplementary character, adjust it 111505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * to the start (lead surrogate) index. 111605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * 111705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * @param ci A CharacterIterator to set 111805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * @param index the index to set 111905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * @return the resulting index, possibly pinned or adjusted. 112005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert */ 112105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert private static int CISetIndex32(CharacterIterator ci, int index) { 112205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (index <= ci.getBeginIndex()) { 112305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert ci.first(); 112405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else if (index >= ci.getEndIndex()) { 112505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert ci.setIndex(ci.getEndIndex()); 112605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else if (Character.isLowSurrogate(ci.setIndex(index))) { 112705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (!Character.isHighSurrogate(ci.previous())) { 112805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert ci.next(); 112905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 113005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 113105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return ci.getIndex(); 113205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 113305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 113405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert /* DictionaryCache stores the boundaries obtained from a run of dictionary characters. 113505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Dictionary boundaries are moved first to this cache, then from here 113605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * to the main BreakCache, where they may inter-leave with non-dictionary 113705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * boundaries. The public BreakIterator API always fetches directly 113805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * from the main BreakCache, not from here. 113905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * 114005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * In common situations, the number of boundaries in a single dictionary run 114105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * should be quite small, it will be terminated by punctuation, spaces, 114205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * or any other non-dictionary characters. The main BreakCache may end 114305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * up with boundaries from multiple dictionary based runs. 114405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * 114505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * The boundaries are stored in a simple ArrayList (vector), with the 114605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * assumption that they will be accessed sequentially. 114705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert */ 114805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert class DictionaryCache { 114905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 115005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert void reset() { 115105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fPositionInCache = -1; 115205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fStart = 0; 115305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fLimit = 0; 115405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fFirstRuleStatusIndex = 0; 115505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fOtherRuleStatusIndex = 0; 115605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBreaks.removeAllElements(); 115705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert }; 115805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 115905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert boolean following(int fromPos) { 116005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (fromPos >= fLimit || fromPos < fStart) { 116105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fPositionInCache = -1; 116205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return false; 116305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 116405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 116505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Sequential iteration, move from previous boundary to the following 116605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 116705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int r = 0; 116805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (fPositionInCache >= 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAt(fPositionInCache) == fromPos) { 116905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert ++fPositionInCache; 117005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (fPositionInCache >= fBreaks.size()) { 117105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fPositionInCache = -1; 117205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return false; 117305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 117405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert r = fBreaks.elementAt(fPositionInCache); 117505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert assert(r > fromPos); 117605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBoundary = r; 117705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fStatusIndex = fOtherRuleStatusIndex; 117805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return true; 117905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 118005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 118105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Random indexing. Linear search for the boundary following the given position. 118205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 118305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert for (fPositionInCache = 0; fPositionInCache < fBreaks.size(); ++fPositionInCache) { 118405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert r= fBreaks.elementAt(fPositionInCache); 118505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (r > fromPos) { 118605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBoundary = r; 118705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fStatusIndex = fOtherRuleStatusIndex; 118805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return true; 118905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 119005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 119105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 119205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Internal error. fStart <= fromPos < fLimit, but no cached boundary. 119305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert assert(false); 119405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fPositionInCache = -1; 119505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return false; 119605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert }; 119705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 119805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert boolean preceding(int fromPos) { 119905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (fromPos <= fStart || fromPos > fLimit) { 120005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fPositionInCache = -1; 120105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return false; 120205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 120305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 120405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (fromPos == fLimit) { 120505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fPositionInCache = fBreaks.size() - 1; 120605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (fPositionInCache >= 0) { 120705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert assert(fBreaks.elementAt(fPositionInCache) == fromPos); 120805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 120905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 121005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 121105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int r; 121205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (fPositionInCache > 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAt(fPositionInCache) == fromPos) { 121305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert --fPositionInCache; 121405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert r = fBreaks.elementAt(fPositionInCache); 121505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert assert(r < fromPos); 121605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBoundary = r; 121705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fStatusIndex = ( r== fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex; 121805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return true; 121905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 122005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 122105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (fPositionInCache == 0) { 122205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fPositionInCache = -1; 122305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return false; 122405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 122505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 122605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert for (fPositionInCache = fBreaks.size()-1; fPositionInCache >= 0; --fPositionInCache) { 122705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert r = fBreaks.elementAt(fPositionInCache); 122805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (r < fromPos) { 122905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBoundary = r; 123005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fStatusIndex = ( r == fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex; 123105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return true; 123205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 123305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 123405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert assert(false); 123505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fPositionInCache = -1; 123605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return false; 123705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert }; 123805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 123905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert /** 124005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Populate the cache with the dictionary based boundaries within a region of text. 124105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * @param startPos The start position of a range of text 124205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * @param endPos The end position of a range of text 124305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * @param firstRuleStatus The rule status index that applies to the break at startPos 124405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * @param otherRuleStatus The rule status index that applies to boundaries other than startPos 124505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * @hide draft / provisional / internal are hidden on Android 124605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert */ 124705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert void populateDictionary(int startPos, int endPos, 124805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int firstRuleStatus, int otherRuleStatus) { 124905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if ((endPos - startPos) <= 1) { 125005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return; 125105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 125205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 125305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert reset(); 125405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fFirstRuleStatusIndex = firstRuleStatus; 125505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fOtherRuleStatusIndex = otherRuleStatus; 125605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 125705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int rangeStart = startPos; 125805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int rangeEnd = endPos; 125905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 126005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int category; 126105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int current; 126205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int foundBreakCount = 0; 126305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 126405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Loop through the text, looking for ranges of dictionary characters. 126505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // For each span, find the appropriate break engine, and ask it to find 126605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // any breaks within the span. 126705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 126805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fText.setIndex(rangeStart); 126905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int c = CharacterIteration.current32(fText); 127005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert category = (short)fRData.fTrie.get(c); 127105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 127205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert while(true) { 127305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert while((current = fText.getIndex()) < rangeEnd && (category & 0x4000) == 0) { 127405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert c = CharacterIteration.next32(fText); // pre-increment 127505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert category = (short)fRData.fTrie.get(c); 127605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 127705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (current >= rangeEnd) { 127805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert break; 127905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 128005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 128105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // We now have a dictionary character. Get the appropriate language object 128205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // to deal with it. 128305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert LanguageBreakEngine lbe = getLanguageBreakEngine(c); 128405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 128505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Ask the language object if there are any breaks. It will add them to the cache and 128605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // leave the text pointer on the other side of its range, ready to search for the next one. 128705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (lbe != null) { 128805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert foundBreakCount += lbe.findBreaks(fText, rangeStart, rangeEnd, fBreakType, fBreaks); 128905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 129005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 129105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Reload the loop variables for the next go-round 129205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert c = CharacterIteration.current32(fText); 129305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert category = (short)fRData.fTrie.get(c); 129405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 129505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 129605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // If we found breaks, ensure that the first and last entries are 129705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // the original starting and ending position. And initialize the 129805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // cache iteration position to the first entry. 129905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 130005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // System.out.printf("foundBreakCount = %d%n", foundBreakCount); 130105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (foundBreakCount > 0) { 130205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert assert(foundBreakCount == fBreaks.size()); 130305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (startPos < fBreaks.elementAt(0)) { 130405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // The dictionary did not place a boundary at the start of the segment of text. 130505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Add one now. This should not commonly happen, but it would be easy for interactions 130605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // of the rules for dictionary segments and the break engine implementations to 130705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // inadvertently cause it. Cover it here, just in case. 130805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBreaks.offer(startPos); 130905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 131005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (endPos > fBreaks.peek()) { 131105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBreaks.push(endPos); 131205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 131305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fPositionInCache = 0; 131405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Note: Dictionary matching may extend beyond the original limit. 131505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fStart = fBreaks.elementAt(0); 131605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fLimit = fBreaks.peek(); 131705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else { 131805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // there were no language-based breaks, even though the segment contained 131905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // dictionary characters. Subsequent attempts to fetch boundaries from the dictionary cache 132005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // for this range will fail, and the calling code will fall back to the rule based boundaries. 132105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 132205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 132305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert }; 132405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 132505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 132605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert DictionaryCache() { 132705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fPositionInCache = -1; 132805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBreaks = new DictionaryBreakEngine.DequeI(); 132905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 133005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 133105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert /** 133205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * copy constructor. Used by RuleBasedBreakIterator.clone(). 133305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * 133405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * @param src the source object to be copied. 133505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert */ 133605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert DictionaryCache(DictionaryCache src) { 133705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert try { 133805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBreaks = (DictionaryBreakEngine.DequeI)src.fBreaks.clone(); 133905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 134005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert catch (CloneNotSupportedException e) { 134105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert throw new RuntimeException(e); 134205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 134305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fPositionInCache = src.fPositionInCache; 134405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fStart = src.fStart; 134505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fLimit = src.fLimit; 134605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fFirstRuleStatusIndex = src.fFirstRuleStatusIndex; 134705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fOtherRuleStatusIndex = src.fOtherRuleStatusIndex; 134805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBoundary = src.fBoundary; 134905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fStatusIndex = src.fStatusIndex; 135005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 135105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 135205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // A data structure containing the boundaries themselves. Essentially a vector of raw ints. 135305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert DictionaryBreakEngine.DequeI fBreaks; 135405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int fPositionInCache; // Index in fBreaks of last boundary returned by following() 135505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // // or preceding(). Optimizes sequential access. 135605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int fStart; // Text position of first boundary in cache. 135705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int fLimit; // Last boundary in cache. Which is the limit of the 135805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // // text segment being handled by the dictionary. 135905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int fFirstRuleStatusIndex; // Rule status info for first boundary. 136005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int fOtherRuleStatusIndex; // Rule status info for 2nd through last boundaries. 136105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int fBoundary; // Current boundary. Set by preceding(), following(). 136205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int fStatusIndex; // Current rule status index. Set by preceding, following(). 136305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert }; 136405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 136505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 136605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 136705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 136805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert/* 136905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * class BreakCache 137005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * 137105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Cache of break boundary positions and rule status values. 137205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Break iterator API functions, next(), previous(), etc., will use cached results 137305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * when possible, and otherwise cache new results as they are obtained. 137405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * 137505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Uniformly caches both dictionary and rule based (non-dictionary) boundaries. 137605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * 137705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * The cache is implemented as a single circular buffer. 137805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert */ 137905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 138005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert/* 138105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * size of the circular cache buffer. 138205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert */ 138305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 138405fa7802d0874812c234a29745586677ee5837eaFredrik Roubertclass BreakCache { 138505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 138605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert BreakCache() { 138705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert reset(); 138805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert }; 138905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 139005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert void reset(int pos, int ruleStatus) { 139105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fStartBufIdx = 0; 139205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fEndBufIdx = 0; 139305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fTextIdx = pos; 139405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBufIdx = 0; 139505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBoundaries[0] = pos; 139605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fStatuses[0] = (short)ruleStatus; 139705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 139805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 139905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert void reset() {reset(0, 0); }; 140005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 140105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert void next() { 140205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (fBufIdx == fEndBufIdx) { 140305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fDone = !populateFollowing(); 140405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fPosition = fTextIdx; 140505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fRuleStatusIndex = fStatuses[fBufIdx]; 140605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else { 140705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBufIdx = modChunkSize(fBufIdx + 1); 140805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fTextIdx = fPosition = fBoundaries[fBufIdx]; 140905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fRuleStatusIndex = fStatuses[fBufIdx]; 141005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 141105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert }; 141205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 141305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert void previous() { 141405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int initialBufIdx = fBufIdx; 141505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (fBufIdx == fStartBufIdx) { 141605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // At start of cache. Prepend to it. 141705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert populatePreceding(); 141805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else { 141905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Cache already holds the next boundary 142005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBufIdx = modChunkSize(fBufIdx - 1); 142105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fTextIdx = fBoundaries[fBufIdx]; 142205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 142305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fDone = (fBufIdx == initialBufIdx); 142405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fPosition = fTextIdx; 142505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fRuleStatusIndex = fStatuses[fBufIdx]; 142605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return; 142705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert }; 142805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 142905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Move the iteration state to the position following the startPosition. 143005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Input position must be pinned to the input length. 143105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert void following(int startPos) { 143205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (startPos == fTextIdx || seek(startPos) || populateNear(startPos)) { 143305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // startPos is in the cache. Do a next() from that position. 143405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // TODO: an awkward set of interactions with bi->fDone 143505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // seek() does not clear it; it can't because of interactions with populateNear(). 143605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // next() does not clear it in the fast-path case, where everything matters. Maybe it should. 143705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // So clear it here, for the case where seek() succeeded on an iterator that had previously run off the end. 143805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fDone = false; 143905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert next(); 144005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 144105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 144205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert }; 144305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 144405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert void preceding(int startPos) { 144505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (startPos == fTextIdx || seek(startPos) || populateNear(startPos)) { 144605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (startPos == fTextIdx) { 144705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert previous(); 144805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else { 144905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // seek() leaves the BreakCache positioned at the preceding boundary 145005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // if the requested position is between two bounaries. 145105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // current() pushes the BreakCache position out to the BreakIterator itself. 145205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert assert(startPos > fTextIdx); 145305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert current(); 145405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 145505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 145605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return; 145705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert }; 145805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 145905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert /* 146005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Update the state of the public BreakIterator (fBI) to reflect the 146105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * current state of the break iterator cache (this). 146205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert */ 146305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int current() { 146405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fPosition = fTextIdx; 146505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fRuleStatusIndex = fStatuses[fBufIdx]; 146605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fDone = false; 146705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return fTextIdx; 146805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert }; 146905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 147005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert /** 147105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Add boundaries to the cache near the specified position. 147205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * The given position need not be a boundary itself. 147305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * The input position must be within the range of the text, and 147405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * on a code point boundary. 147505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * If the requested position is a break boundary, leave the iteration 147605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * position on it. 147705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * If the requested position is not a boundary, leave the iteration 147805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * position on the preceding boundary and include both the the 147905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * preceding and following boundaries in the cache. 148005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Additional boundaries, either preceding or following, may be added 148105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * to the cache as a side effect. 148205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * 148305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Return false if the operation failed. 148405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert */ 148505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert boolean populateNear(int position) { 148605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert assert(position < fBoundaries[fStartBufIdx] || position > fBoundaries[fEndBufIdx]); 148705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 148805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Find a boundary somewhere in the vicinity of the requested position. 148905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Depending on the safe rules and the text data, it could be either before, at, or after 149005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // the requested position. 149105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 149205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 149305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // If the requested position is not near already cached positions, clear the existing cache, 149405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // find a near-by boundary and begin new cache contents there. 149505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 149605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if ((position < fBoundaries[fStartBufIdx] - 15) || position > (fBoundaries[fEndBufIdx] + 15)) { 149705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int aBoundary = fText.getBeginIndex(); 149805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int ruleStatusIndex = 0; 149905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // TODO: check for position == length of text. Although may still need to back up to get rule status. 150005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (position > aBoundary + 20) { 150105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int backupPos = handlePrevious(position); 150205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fPosition = backupPos; 150305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert aBoundary = handleNext(); // Ignore dictionary, just finding a rule based boundary. 150405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert ruleStatusIndex = fRuleStatusIndex; 150505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 150605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert reset(aBoundary, ruleStatusIndex); // Reset cache to hold aBoundary as a single starting point. 150705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 150805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 150905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Fill in boundaries between existing cache content and the new requested position. 151005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 151105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (fBoundaries[fEndBufIdx] < position) { 151205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // The last position in the cache precedes the requested position. 151305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Add following position(s) to the cache. 151405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert while (fBoundaries[fEndBufIdx] < position) { 151505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (!populateFollowing()) { 151605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert assert false; 151705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return false; 151805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 151905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 152005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBufIdx = fEndBufIdx; // Set iterator position to the end of the buffer. 152105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fTextIdx = fBoundaries[fBufIdx]; // Required because populateFollowing may add extra boundaries. 152205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert while (fTextIdx > position) { // Move backwards to a position at or preceding the requested pos. 152305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert previous(); 152405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 152505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return true; 152605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 152705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 152805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (fBoundaries[fStartBufIdx] > position) { 152905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // The first position in the cache is beyond the requested position. 153005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // back up more until we get a boundary <= the requested position. 153105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert while (fBoundaries[fStartBufIdx] > position) { 153205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert populatePreceding(); 153305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 153405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBufIdx = fStartBufIdx; // Set iterator position to the start of the buffer. 153505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fTextIdx = fBoundaries[fBufIdx]; // Required because populatePreceding may add extra boundaries. 153605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert while (fTextIdx < position) { // Move forwards to a position at or following the requested pos. 153705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert next(); 153805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 153905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (fTextIdx > position) { 154005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // If position is not itself a boundary, the next() loop above will overshoot. 154105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Back up one, leaving cache position at the boundary preceding the requested position. 154205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert previous(); 154305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 154405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return true; 154505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 154605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 154705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert assert fTextIdx == position; 154805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return true; 154905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 155005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert }; 155105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 155205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert /** 155305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Add boundary(s) to the cache following the current last boundary. 155405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Return false if at the end of the text, and no more boundaries can be added. 155505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Leave iteration position at the first newly added boundary, or unchanged if no boundary was added. 155605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert */ 155705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert boolean populateFollowing() { 155805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int fromPosition = fBoundaries[fEndBufIdx]; 155905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int fromRuleStatusIdx = fStatuses[fEndBufIdx]; 156005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int pos = 0; 156105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int ruleStatusIdx = 0; 156205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 156305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (fDictionaryCache.following(fromPosition)) { 156405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert addFollowing(fDictionaryCache.fBoundary, fDictionaryCache.fStatusIndex, UpdateCachePosition); 156505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return true; 156605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 156705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 156805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fPosition = fromPosition; 156905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert pos = handleNext(); 157005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (pos == BreakIterator.DONE) { 157105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return false; 157205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 157305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 157405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert ruleStatusIdx = fRuleStatusIndex; 157505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (fDictionaryCharCount > 0) { 157605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // The text segment obtained from the rules includes dictionary characters. 157705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Subdivide it, with subdivided results going into the dictionary cache. 157805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fDictionaryCache.populateDictionary(fromPosition, pos, fromRuleStatusIdx, ruleStatusIdx); 157905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (fDictionaryCache.following(fromPosition)) { 158005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert addFollowing(fDictionaryCache.fBoundary, fDictionaryCache.fStatusIndex, UpdateCachePosition); 158105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return true; 158205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // TODO: may want to move a sizable chunk of the dictionary cache to the break cache at this point. 158305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // But be careful with interactions with populateNear(). 158405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 158505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 158605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 158705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Rule based segment did not include dictionary characters. 158805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Or, it did contain dictionary chars, but the dictionary segmenter didn't handle them, 158905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // meaning that we didn't take the return, above. 159005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Add its end point to the cache. 159105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert addFollowing(pos, ruleStatusIdx, UpdateCachePosition); 159205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 159305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Add several non-dictionary boundaries at this point, to optimize straight forward iteration. 159405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // (subsequent calls to BreakIterator::next() will take the fast path, getting cached results. 159505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // 159605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert for (int count=0; count<6; ++count) { 159705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert pos = handleNext(); 159805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (pos == BreakIterator.DONE || fDictionaryCharCount > 0) { 159905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert break; 160005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 160105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert addFollowing(pos, fRuleStatusIndex, RetainCachePosition); 160205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 160305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return true; 160405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert }; 160505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 160605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert /** 160705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Add one or more boundaries to the cache preceding the first currently cached boundary. 160805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Leave the iteration position on the first added boundary. 160905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Return false if no boundaries could be added (if at the start of the text.) 161005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert */ 161105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert boolean populatePreceding() { 161205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int textBegin = fText.getBeginIndex(); 161305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int fromPosition = fBoundaries[fStartBufIdx]; 161405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (fromPosition == textBegin) { 161505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return false; 161605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 161705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 161805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int position = textBegin; 161905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int positionStatusIdx = 0; 162005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 162105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (fDictionaryCache.preceding(fromPosition)) { 162205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert addPreceding(fDictionaryCache.fBoundary, fDictionaryCache.fStatusIndex, UpdateCachePosition); 162305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return true; 162405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 162505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 162605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int backupPosition = fromPosition; 162705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 162805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Find a boundary somewhere preceding the first already-cached boundary 162905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert do { 163005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert backupPosition = backupPosition - 30; 163105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (backupPosition <= textBegin) { 163205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert backupPosition = textBegin; 163305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else { 163405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert backupPosition = handlePrevious(backupPosition); 163505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 163605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (backupPosition == BreakIterator.DONE || backupPosition == textBegin) { 163705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert position = textBegin; 163805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert positionStatusIdx = 0; 163905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else { 164005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fPosition = backupPosition; // TODO: pass starting position in a clearer way. 164105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert position = handleNext(); 164205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert positionStatusIdx = fRuleStatusIndex; 164305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 164405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 164505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } while (position >= fromPosition); 164605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 164705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Find boundaries between the one we just located and the first already-cached boundary 164805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Put them in a side buffer, because we don't yet know where they will fall in the circular cache buffer.. 164905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 165005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fSideBuffer.removeAllElements(); 165105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fSideBuffer.push(position); 165205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fSideBuffer.push(positionStatusIdx); 165305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 165405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert do { 165505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int prevPosition = fPosition = position; 165605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int prevStatusIdx = positionStatusIdx; 165705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert position = handleNext(); 165805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert positionStatusIdx = fRuleStatusIndex; 165905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (position == BreakIterator.DONE) { 166005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert break; 166105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 166205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 166305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert boolean segmentHandledByDictionary = false; 166405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (fDictionaryCharCount != 0) { 166505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Segment from the rules includes dictionary characters. 166605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Subdivide it, with subdivided results going into the dictionary cache. 166705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int dictSegEndPosition = position; 166805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fDictionaryCache.populateDictionary(prevPosition, dictSegEndPosition, prevStatusIdx, positionStatusIdx); 166905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert while (fDictionaryCache.following(prevPosition)) { 167005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert position = fDictionaryCache.fBoundary; 167105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert positionStatusIdx = fDictionaryCache.fStatusIndex; 167205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert segmentHandledByDictionary = true; 167305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert assert(position > prevPosition); 167405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (position >= fromPosition) { 167505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert break; 167605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 167705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert assert(position <= dictSegEndPosition); 167805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fSideBuffer.push(position); 167905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fSideBuffer.push(positionStatusIdx); 168005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert prevPosition = position; 168105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 168205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert assert(position==dictSegEndPosition || position>=fromPosition); 168305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 168405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 168505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (!segmentHandledByDictionary && position < fromPosition) { 168605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fSideBuffer.push(position); 168705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fSideBuffer.push(positionStatusIdx); 168805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 168905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } while (position < fromPosition); 169005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 169105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Move boundaries from the side buffer to the main circular buffer. 169205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert boolean success = false; 169305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (!fSideBuffer.isEmpty()) { 169405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert positionStatusIdx = fSideBuffer.pop(); 169505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert position = fSideBuffer.pop(); 169605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert addPreceding(position, positionStatusIdx, UpdateCachePosition); 169705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert success = true; 169805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 169905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 170005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert while (!fSideBuffer.isEmpty()) { 170105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert positionStatusIdx = fSideBuffer.pop(); 170205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert position = fSideBuffer.pop(); 170305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (!addPreceding(position, positionStatusIdx, RetainCachePosition)) { 170405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // No space in circular buffer to hold a new preceding result while 170505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // also retaining the current cache (iteration) position. 170605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Bailing out is safe; the cache will refill again if needed. 170705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert break; 170805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 170905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 171005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return success; 171105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert }; 171205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 171305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 171405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert static final boolean RetainCachePosition = false; 171505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert static final boolean UpdateCachePosition = true; 171605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 171705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert /* 171805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Add the boundary following the current position. 171905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * The current position can be left as it was, or changed to the newly added boundary, 172005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * as specified by the update parameter. 172105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert */ 172205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert void addFollowing(int position, int ruleStatusIdx, boolean update) { 172305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert assert(position > fBoundaries[fEndBufIdx]); 172405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert assert(ruleStatusIdx <= Short.MAX_VALUE); 172505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int nextIdx = modChunkSize(fEndBufIdx + 1); 172605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (nextIdx == fStartBufIdx) { 172705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fStartBufIdx = modChunkSize(fStartBufIdx + 6); // TODO: experiment. Probably revert to 1. 172805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 172905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBoundaries[nextIdx] = position; 173005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fStatuses[nextIdx] = (short)ruleStatusIdx; 173105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fEndBufIdx = nextIdx; 173205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (update == UpdateCachePosition) { 173305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Set current position to the newly added boundary. 173405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBufIdx = nextIdx; 173505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fTextIdx = position; 173605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else { 173705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Retaining the original cache position. 173805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Check if the added boundary wraps around the buffer, and would over-write the original position. 173905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // It's the responsibility of callers of this function to not add too many. 174005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert assert(nextIdx != fBufIdx); 174105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 174205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 174305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert }; 174405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 174505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 174605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert /* 174705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Add the boundary preceding the current position. 174805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * The current position can be left as it was, or changed to the newly added boundary, 174905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * as specified by the update parameter. 175005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert */ 175105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert boolean addPreceding(int position, int ruleStatusIdx, boolean update) { 175205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert assert(position < fBoundaries[fStartBufIdx]); 175305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert assert(ruleStatusIdx <= Short.MAX_VALUE); 175405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int nextIdx = modChunkSize(fStartBufIdx - 1); 175505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (nextIdx == fEndBufIdx) { 175605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (fBufIdx == fEndBufIdx && update == RetainCachePosition) { 175705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Failure. The insertion of the new boundary would claim the buffer position that is the 175805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // current iteration position. And we also want to retain the current iteration position. 175905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // (The buffer is already completely full of entries that precede the iteration position.) 176005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return false; 176105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 176205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fEndBufIdx = modChunkSize(fEndBufIdx - 1); 176305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 176405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBoundaries[nextIdx] = position; 176505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fStatuses[nextIdx] = (short)ruleStatusIdx; 176605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fStartBufIdx = nextIdx; 176705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (update == UpdateCachePosition) { 176805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBufIdx = nextIdx; 176905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fTextIdx = position; 177005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 177105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return true; 177205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert }; 177305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 177405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert /** 177505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Set the cache position to the specified position, or, if the position 177605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * falls between to cached boundaries, to the preceding boundary. 177705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Fails if the requested position is outside of the range of boundaries currently held by the cache. 177805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * The startPosition must be on a code point boundary. 177905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * 178005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Return true if successful, false if the specified position is after 178105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * the last cached boundary or before the first. 178205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert */ 178305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert boolean seek(int pos) { 178405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (pos < fBoundaries[fStartBufIdx] || pos > fBoundaries[fEndBufIdx]) { 178505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return false; 178605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 178705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (pos == fBoundaries[fStartBufIdx]) { 178805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Common case: seek(0), from BreakIterator::first() 178905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBufIdx = fStartBufIdx; 179005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fTextIdx = fBoundaries[fBufIdx]; 179105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return true; 179205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 179305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (pos == fBoundaries[fEndBufIdx]) { 179405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBufIdx = fEndBufIdx; 179505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fTextIdx = fBoundaries[fBufIdx]; 179605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return true; 179705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 179805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 179905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int min = fStartBufIdx; 180005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int max = fEndBufIdx; 180105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert while (min != max) { 180205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int probe = (min + max + (min>max ? CACHE_SIZE : 0)) / 2; 180305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert probe = modChunkSize(probe); 180405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (fBoundaries[probe] > pos) { 180505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert max = probe; 180605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else { 180705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert min = modChunkSize(probe + 1); 180805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 180905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 181005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert assert(fBoundaries[max] > pos); 181105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBufIdx = modChunkSize(max - 1); 181205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fTextIdx = fBoundaries[fBufIdx]; 181305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert assert(fTextIdx <= pos); 181405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return true; 181505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 181605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert }; 181705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 181805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 181905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert /** 182005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * copy constructor, used from RuleBasedBreakIterator.clone(). 182105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * 182205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * @param src 182305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert */ 182405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert BreakCache(BreakCache src) { 182505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fStartBufIdx = src.fStartBufIdx; 182605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fEndBufIdx = src.fEndBufIdx; 182705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fTextIdx = src.fTextIdx; 182805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBufIdx = src.fBufIdx; 182905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fBoundaries = src.fBoundaries.clone(); 183005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fStatuses = src.fStatuses.clone(); 183105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fSideBuffer = new DictionaryBreakEngine.DequeI(); // Transient, no need to clone contents. 183205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 183305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 183405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert void dumpCache() { 183505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert System.out.printf("fTextIdx:%d fBufIdx:%d%n", fTextIdx, fBufIdx); 183605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert for (int i=fStartBufIdx; ; i=modChunkSize(i+1)) { 183705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert System.out.printf("%d %d%n", i, fBoundaries[i]); 183805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (i == fEndBufIdx) { 183905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert break; 184005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 184105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 184205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert }; 184305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 184405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert private final int modChunkSize(int index) { return index & (CACHE_SIZE - 1); }; 184505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 184605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert static final int CACHE_SIZE = 128; 184705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // static_assert((CACHE_SIZE & (CACHE_SIZE-1)) == 0, "CACHE_SIZE must be power of two."); 184805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 184905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int fStartBufIdx; 185005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int fEndBufIdx; // inclusive 185105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 185205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int fTextIdx; 185305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int fBufIdx; 185405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 185505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int[] fBoundaries = new int[CACHE_SIZE]; 185605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert short[] fStatuses = new short[CACHE_SIZE]; 185705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 185805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert DictionaryBreakEngine.DequeI fSideBuffer = new DictionaryBreakEngine.DequeI(); 185905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert}; 186005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 186105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 186205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 186305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 18642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller} 18652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1866