12ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* GENERATED SOURCE. DO NOT MODIFY. */ 2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others. 3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License 42ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* 52ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ******************************************************************************* 62ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Copyright (C) 2014, International Business Machines Corporation and * 72ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * others. All Rights Reserved. * 82ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ******************************************************************************* 92ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpackage android.icu.text; 112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.io.IOException; 132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.text.CharacterIterator; 142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UCharacter; 162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UProperty; 172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UScript; 182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerclass ThaiBreakEngine extends DictionaryBreakEngine { 202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Constants for ThaiBreakIterator 222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // How many words in a row are "good enough"? 232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final byte THAI_LOOKAHEAD = 3; 242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Will not combine a non-word with a preceding dictionary word longer than this 252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final byte THAI_ROOT_COMBINE_THRESHOLD = 3; 262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Will not combine a non-word that shares at least this much prefix with a 272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // dictionary word with a preceding word 282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final byte THAI_PREFIX_COMBINE_THRESHOLD = 3; 292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Ellision character 302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final char THAI_PAIYANNOI = 0x0E2F; 312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Repeat character 322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final char THAI_MAIYAMOK = 0x0E46; 332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Minimum word size 342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final byte THAI_MIN_WORD = 2; 352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Minimum number of characters for two words 362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final byte THAI_MIN_WORD_SPAN = THAI_MIN_WORD * 2; 372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private DictionaryMatcher fDictionary; 392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static UnicodeSet fThaiWordSet; 402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static UnicodeSet fEndWordSet; 412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static UnicodeSet fBeginWordSet; 422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static UnicodeSet fSuffixSet; 432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static UnicodeSet fMarkSet; 442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static { 462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Initialize UnicodeSets 472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fThaiWordSet = new UnicodeSet(); 482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fMarkSet = new UnicodeSet(); 492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fBeginWordSet = new UnicodeSet(); 502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fSuffixSet = new UnicodeSet(); 512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fThaiWordSet.applyPattern("[[:Thai:]&[:LineBreak=SA:]]"); 532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fThaiWordSet.compact(); 542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fMarkSet.applyPattern("[[:Thai:]&[:LineBreak=SA:]&[:M:]]"); 562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fMarkSet.add(0x0020); 572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fEndWordSet = new UnicodeSet(fThaiWordSet); 582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fEndWordSet.remove(0x0E31); // MAI HAN-AKAT 592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fEndWordSet.remove(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI 602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fBeginWordSet.add(0x0E01, 0x0E2E); //KO KAI through HO NOKHUK 612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fBeginWordSet.add(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI 622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fSuffixSet.add(THAI_PAIYANNOI); 632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fSuffixSet.add(THAI_MAIYAMOK); 642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Compact for caching 662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fMarkSet.compact(); 672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fEndWordSet.compact(); 682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fBeginWordSet.compact(); 692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fSuffixSet.compact(); 702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Freeze the static UnicodeSet 722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fThaiWordSet.freeze(); 732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fMarkSet.freeze(); 742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fEndWordSet.freeze(); 752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fBeginWordSet.freeze(); 762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fSuffixSet.freeze(); 772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public ThaiBreakEngine() throws IOException { 802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller super(BreakIterator.KIND_WORD, BreakIterator.KIND_LINE); 812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setCharacters(fThaiWordSet); 822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Initialize dictionary 832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fDictionary = DictionaryData.loadDictionaryFor("Thai"); 842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean equals(Object obj) { 872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Normally is a singleton, but it's possible to have duplicates 882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // during initialization. All are equivalent. 892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return obj instanceof ThaiBreakEngine; 902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int hashCode() { 932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return getClass().hashCode(); 942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean handles(int c, int breakType) { 972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (breakType == BreakIterator.KIND_WORD || breakType == BreakIterator.KIND_LINE) { 982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT); 992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (script == UScript.THAI); 1002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 1022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int divideUpDictionaryRange(CharacterIterator fIter, int rangeStart, int rangeEnd, 1052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller DequeI foundBreaks) { 1062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if ((rangeEnd - rangeStart) < THAI_MIN_WORD_SPAN) { 1082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; // Not enough characters for word 1092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int wordsFound = 0; 1112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int wordLength; 1122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller PossibleWord words[] = new PossibleWord[THAI_LOOKAHEAD]; 1132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i = 0; i < THAI_LOOKAHEAD; i++) { 1142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller words[i] = new PossibleWord(); 1152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int uc; 1182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fIter.setIndex(rangeStart); 1192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int current; 1202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while ((current = fIter.getIndex()) < rangeEnd) { 1212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller wordLength = 0; 1222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //Look for candidate words at the current position 1242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int candidates = words[wordsFound%THAI_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd); 1252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If we found exactly one, use that 1272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (candidates == 1) { 1282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller wordLength = words[wordsFound%THAI_LOOKAHEAD].acceptMarked(fIter); 1292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller wordsFound += 1; 1302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If there was more than one, see which one can take us forward the most words 1332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else if (candidates > 1) { 1342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If we're already at the end of the range, we're done 1352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fIter.getIndex() < rangeEnd) { 1362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller foundBest: 1372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller do { 1382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int wordsMatched = 1; 1392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (words[(wordsFound+1)%THAI_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd) > 0) { 1402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (wordsMatched < 2) { 1412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Followed by another dictionary word; mark first word as a good candidate 1422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller words[wordsFound%THAI_LOOKAHEAD].markCurrent(); 1432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller wordsMatched = 2; 1442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If we're already at the end of the range, we're done 1472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fIter.getIndex() >= rangeEnd) { 1482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break foundBest; 1492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // See if any of the possible second words is followed by a third word 1522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller do { 1532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If we find a third word, stop right away 1542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (words[(wordsFound+2)%THAI_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd) > 0) { 1552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller words[wordsFound%THAI_LOOKAHEAD].markCurrent(); 1562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break foundBest; 1572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } while (words[(wordsFound+1)%THAI_LOOKAHEAD].backUp(fIter)); 1592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (words[wordsFound%THAI_LOOKAHEAD].backUp(fIter)); 1622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // foundBest: end of loop 1632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller wordLength = words[wordsFound%THAI_LOOKAHEAD].acceptMarked(fIter); 1652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller wordsFound += 1; 1662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We come here after having either found a word or not. We look ahead to the 1692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // next word. If it's not a dictionary word, we will combine it with the word we 1702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // just found (if there is one), but only if the preceding word does not exceed 1712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the threshold. 1722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The text iterator should now be positioned at the end of the word we found. 1732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fIter.getIndex() < rangeEnd && wordLength < THAI_ROOT_COMBINE_THRESHOLD) { 1742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If it is a dictionary word, do nothing. If it isn't, then if there is 1752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // no preceding word, or the non-word shares less than the minimum threshold 1762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // of characters with a dictionary word, then scan to resynchronize 1772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (words[wordsFound%THAI_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd) <= 0 && 1782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (wordLength == 0 || 1792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller words[wordsFound%THAI_LOOKAHEAD].longestPrefix() < THAI_PREFIX_COMBINE_THRESHOLD)) { 1802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Look for a plausible word boundary 1812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int remaining = rangeEnd - (current + wordLength); 1822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int pc = fIter.current(); 1832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int chars = 0; 1842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (;;) { 1852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fIter.next(); 1862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller uc = fIter.current(); 1872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller chars += 1; 1882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (--remaining <= 0) { 1892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 1902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) { 1922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Maybe. See if it's in the dictionary. 1932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Note: In the original Apple code, checked that the next 1942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // two characters after uc were not 0x0E4C THANTHAKHAT before 1952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // checking the dictionary. That is just a performance filter, 1962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // but it's not clear it's faster than checking the trie 1972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int candidate = words[(wordsFound + 1) %THAI_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd); 1982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fIter.setIndex(current + wordLength + chars); 1992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (candidate > 0) { 2002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 2012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pc = uc; 2042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Bump the word count if there wasn't already one 2072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (wordLength <= 0) { 2082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller wordsFound += 1; 2092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Update the length with the passed-over characters 2122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller wordLength += chars; 2132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Backup to where we were for next iteration 2152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fIter.setIndex(current+wordLength); 2162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Never stop before a combining mark. 2202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int currPos; 2212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while ((currPos = fIter.getIndex()) < rangeEnd && fMarkSet.contains(fIter.current())) { 2222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fIter.next(); 2232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller wordLength += fIter.getIndex() - currPos; 2242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Look ahead for possible suffixes if a dictionary word does not follow. 2272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We do this in code rather than using a rule so that the heuristic 2282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // resynch continues to function. For example, one of the suffix characters 2292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // could be a typo in the middle of a word. 2302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fIter.getIndex() < rangeEnd && wordLength > 0) { 2312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (words[wordsFound%THAI_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd) <= 0 && 2322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fSuffixSet.contains(uc = fIter.current())) { 2332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (uc == THAI_PAIYANNOI) { 2342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!fSuffixSet.contains(fIter.previous())) { 2352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Skip over previous end and PAIYANNOI 2362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fIter.next(); 2372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fIter.next(); 2382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller wordLength += 1; 2392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller uc = fIter.current(); 2402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Restore prior position 2422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fIter.next(); 2432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (uc == THAI_MAIYAMOK) { 2462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fIter.previous() != THAI_MAIYAMOK) { 2472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Skip over previous end and MAIYAMOK 2482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fIter.next(); 2492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fIter.next(); 2502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller wordLength += 1; 2512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // restore prior position 2532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fIter.next(); 2542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fIter.setIndex(current + wordLength); 2582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Did we find a word on this iteration? If so, push it on the break stack 2622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (wordLength > 0) { 2632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller foundBreaks.push(Integer.valueOf(current + wordLength)); 2642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Don't return a break for the end of the dictionary range if there is one there 2682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (foundBreaks.peek() >= rangeEnd) { 2692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller foundBreaks.pop(); 2702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller wordsFound -= 1; 2712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return wordsFound; 2742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller} 277