12ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* GENERATED SOURCE. DO NOT MODIFY. */
2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others.
3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
42ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/*
52ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *******************************************************************************
62ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Copyright (C) 2014, International Business Machines Corporation and         *
72ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * others. All Rights Reserved.                                                *
82ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *******************************************************************************
92ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */
102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpackage android.icu.text;
112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.io.IOException;
132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.text.CharacterIterator;
142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UCharacter;
162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UProperty;
172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UScript;
182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerclass ThaiBreakEngine extends DictionaryBreakEngine {
202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Constants for ThaiBreakIterator
222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // How many words in a row are "good enough"?
232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final byte THAI_LOOKAHEAD = 3;
242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Will not combine a non-word with a preceding dictionary word longer than this
252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final byte THAI_ROOT_COMBINE_THRESHOLD = 3;
262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Will not combine a non-word that shares at least this much prefix with a
272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // dictionary word with a preceding word
282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final byte THAI_PREFIX_COMBINE_THRESHOLD = 3;
292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Ellision character
302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final char THAI_PAIYANNOI = 0x0E2F;
312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Repeat character
322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final char THAI_MAIYAMOK = 0x0E46;
332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Minimum word size
342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final byte THAI_MIN_WORD = 2;
352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Minimum number of characters for two words
362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final byte THAI_MIN_WORD_SPAN = THAI_MIN_WORD * 2;
372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private DictionaryMatcher fDictionary;
392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static UnicodeSet fThaiWordSet;
402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static UnicodeSet fEndWordSet;
412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static UnicodeSet fBeginWordSet;
422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static UnicodeSet fSuffixSet;
432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static UnicodeSet fMarkSet;
442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static {
462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Initialize UnicodeSets
472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fThaiWordSet = new UnicodeSet();
482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fMarkSet = new UnicodeSet();
492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fBeginWordSet = new UnicodeSet();
502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fSuffixSet = new UnicodeSet();
512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fThaiWordSet.applyPattern("[[:Thai:]&[:LineBreak=SA:]]");
532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fThaiWordSet.compact();
542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fMarkSet.applyPattern("[[:Thai:]&[:LineBreak=SA:]&[:M:]]");
562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fMarkSet.add(0x0020);
572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fEndWordSet = new UnicodeSet(fThaiWordSet);
582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fEndWordSet.remove(0x0E31); // MAI HAN-AKAT
592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fEndWordSet.remove(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI
602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fBeginWordSet.add(0x0E01, 0x0E2E); //KO KAI through HO NOKHUK
612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fBeginWordSet.add(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI
622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fSuffixSet.add(THAI_PAIYANNOI);
632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fSuffixSet.add(THAI_MAIYAMOK);
642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Compact for caching
662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fMarkSet.compact();
672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fEndWordSet.compact();
682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fBeginWordSet.compact();
692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fSuffixSet.compact();
702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Freeze the static UnicodeSet
722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fThaiWordSet.freeze();
732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fMarkSet.freeze();
742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fEndWordSet.freeze();
752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fBeginWordSet.freeze();
762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fSuffixSet.freeze();
772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public ThaiBreakEngine() throws IOException {
802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        super(BreakIterator.KIND_WORD, BreakIterator.KIND_LINE);
812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        setCharacters(fThaiWordSet);
822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Initialize dictionary
832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fDictionary = DictionaryData.loadDictionaryFor("Thai");
842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean equals(Object obj) {
872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Normally is a singleton, but it's possible to have duplicates
882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //   during initialization. All are equivalent.
892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return obj instanceof ThaiBreakEngine;
902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int hashCode() {
932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return getClass().hashCode();
942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean handles(int c, int breakType) {
972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (breakType == BreakIterator.KIND_WORD || breakType == BreakIterator.KIND_LINE) {
982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return (script == UScript.THAI);
1002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return false;
1022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int divideUpDictionaryRange(CharacterIterator fIter, int rangeStart, int rangeEnd,
1052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            DequeI foundBreaks) {
1062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if ((rangeEnd - rangeStart) < THAI_MIN_WORD_SPAN) {
1082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return 0;  // Not enough characters for word
1092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int wordsFound = 0;
1112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int wordLength;
1122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        PossibleWord words[] = new PossibleWord[THAI_LOOKAHEAD];
1132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = 0; i < THAI_LOOKAHEAD; i++) {
1142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            words[i] = new PossibleWord();
1152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int uc;
1182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fIter.setIndex(rangeStart);
1192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int current;
1202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while ((current = fIter.getIndex()) < rangeEnd) {
1212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            wordLength = 0;
1222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //Look for candidate words at the current position
1242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int candidates = words[wordsFound%THAI_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd);
1252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // If we found exactly one, use that
1272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (candidates == 1) {
1282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                wordLength = words[wordsFound%THAI_LOOKAHEAD].acceptMarked(fIter);
1292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                wordsFound += 1;
1302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
1312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // If there was more than one, see which one can take us forward the most words
1332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else if (candidates > 1) {
1342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // If we're already at the end of the range, we're done
1352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (fIter.getIndex() < rangeEnd) {
1362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                  foundBest:
1372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    do {
1382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        int wordsMatched = 1;
1392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (words[(wordsFound+1)%THAI_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd) > 0) {
1402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            if (wordsMatched < 2) {
1412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // Followed by another dictionary word; mark first word as a good candidate
1422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                words[wordsFound%THAI_LOOKAHEAD].markCurrent();
1432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                wordsMatched = 2;
1442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            }
1452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // If we're already at the end of the range, we're done
1472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            if (fIter.getIndex() >= rangeEnd) {
1482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                break foundBest;
1492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            }
1502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // See if any of the possible second words is followed by a third word
1522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            do {
1532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // If we find a third word, stop right away
1542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                if (words[(wordsFound+2)%THAI_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd) > 0) {
1552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    words[wordsFound%THAI_LOOKAHEAD].markCurrent();
1562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    break foundBest;
1572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                }
1582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            } while (words[(wordsFound+1)%THAI_LOOKAHEAD].backUp(fIter));
1592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
1602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
1612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    while (words[wordsFound%THAI_LOOKAHEAD].backUp(fIter));
1622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // foundBest: end of loop
1632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
1642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                wordLength = words[wordsFound%THAI_LOOKAHEAD].acceptMarked(fIter);
1652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                wordsFound += 1;
1662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
1672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // We come here after having either found a word or not. We look ahead to the
1692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // next word. If it's not a dictionary word, we will combine it with the word we
1702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // just found (if there is one), but only if the preceding word does not exceed
1712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // the threshold.
1722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // The text iterator should now be positioned at the end of the word we found.
1732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (fIter.getIndex() < rangeEnd && wordLength < THAI_ROOT_COMBINE_THRESHOLD) {
1742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // If it is a dictionary word, do nothing. If it isn't, then if there is
1752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // no preceding word, or the non-word shares less than the minimum threshold
1762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // of characters with a dictionary word, then scan to resynchronize
1772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (words[wordsFound%THAI_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd) <= 0 &&
1782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        (wordLength == 0 ||
1792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                words[wordsFound%THAI_LOOKAHEAD].longestPrefix() < THAI_PREFIX_COMBINE_THRESHOLD)) {
1802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Look for a plausible word boundary
1812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int remaining = rangeEnd - (current + wordLength);
1822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int pc = fIter.current();
1832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int chars = 0;
1842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    for (;;) {
1852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        fIter.next();
1862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        uc = fIter.current();
1872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        chars += 1;
1882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (--remaining <= 0) {
1892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            break;
1902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
1912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) {
1922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // Maybe. See if it's in the dictionary.
1932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // Note: In the original Apple code, checked that the next
1942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // two characters after uc were not 0x0E4C THANTHAKHAT before
1952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // checking the dictionary. That is just a performance filter,
1962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // but it's not clear it's faster than checking the trie
1972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            int candidate = words[(wordsFound + 1) %THAI_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd);
1982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            fIter.setIndex(current + wordLength + chars);
1992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            if (candidate > 0) {
2002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                break;
2012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            }
2022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
2032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        pc = uc;
2042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
2052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Bump the word count if there wasn't already one
2072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (wordLength <= 0) {
2082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        wordsFound += 1;
2092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
2102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Update the length with the passed-over characters
2122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    wordLength += chars;
2132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
2142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Backup to where we were for next iteration
2152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    fIter.setIndex(current+wordLength);
2162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
2172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Never stop before a combining mark.
2202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int currPos;
2212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            while ((currPos = fIter.getIndex()) < rangeEnd && fMarkSet.contains(fIter.current())) {
2222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                fIter.next();
2232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                wordLength += fIter.getIndex() - currPos;
2242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Look ahead for possible suffixes if a dictionary word does not follow.
2272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // We do this in code rather than using a rule so that the heuristic
2282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // resynch continues to function. For example, one of the suffix characters
2292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // could be a typo in the middle of a word.
2302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (fIter.getIndex() < rangeEnd && wordLength > 0) {
2312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (words[wordsFound%THAI_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd) <= 0 &&
2322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        fSuffixSet.contains(uc = fIter.current())) {
2332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (uc == THAI_PAIYANNOI) {
2342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (!fSuffixSet.contains(fIter.previous())) {
2352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // Skip over previous end and PAIYANNOI
2362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            fIter.next();
2372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            fIter.next();
2382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            wordLength += 1;
2392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            uc = fIter.current();
2402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        } else {
2412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // Restore prior position
2422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            fIter.next();
2432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
2442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
2452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (uc == THAI_MAIYAMOK) {
2462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if (fIter.previous() != THAI_MAIYAMOK) {
2472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // Skip over previous end and MAIYAMOK
2482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            fIter.next();
2492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            fIter.next();
2502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            wordLength += 1;
2512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        } else {
2522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // restore prior position
2532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            fIter.next();
2542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
2552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
2562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
2572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    fIter.setIndex(current + wordLength);
2582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
2592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Did we find a word on this iteration? If so, push it on the break stack
2622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (wordLength > 0) {
2632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                foundBreaks.push(Integer.valueOf(current + wordLength));
2642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Don't return a break for the end of the dictionary range if there is one there
2682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (foundBreaks.peek() >= rangeEnd) {
2692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            foundBreaks.pop();
2702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            wordsFound -= 1;
2712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return wordsFound;
2742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
2752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller}
277