17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 2014, International Business Machines Corporation and         *
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved.                                                *
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text;
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.io.IOException;
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.text.CharacterIterator;
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UCharacter;
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UProperty;
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UScript;
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertclass LaoBreakEngine extends DictionaryBreakEngine {
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Constants for LaoBreakIterator
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // How many words in a row are "good enough"?
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final byte LAO_LOOKAHEAD = 3;
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Will not combine a non-word with a preceding dictionary word longer than this
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final byte LAO_ROOT_COMBINE_THRESHOLD = 3;
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Will not combine a non-word that shares at least this much prefix with a
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // dictionary word with a preceding word
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final byte LAO_PREFIX_COMBINE_THRESHOLD = 3;
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Minimum word size
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final byte LAO_MIN_WORD = 2;
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private DictionaryMatcher fDictionary;
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static UnicodeSet fLaoWordSet;
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static UnicodeSet fEndWordSet;
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static UnicodeSet fBeginWordSet;
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static UnicodeSet fMarkSet;
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static {
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Initialize UnicodeSets
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fLaoWordSet = new UnicodeSet();
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fMarkSet = new UnicodeSet();
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fBeginWordSet = new UnicodeSet();
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fLaoWordSet.applyPattern("[[:Laoo:]&[:LineBreak=SA:]]");
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fLaoWordSet.compact();
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fMarkSet.applyPattern("[[:Laoo:]&[:LineBreak=SA:]&[:M:]]");
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fMarkSet.add(0x0020);
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fEndWordSet = new UnicodeSet(fLaoWordSet);
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fEndWordSet.remove(0x0EC0, 0x0EC4); // prefix vowels
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fBeginWordSet.add(0x0E81, 0x0EAE); // basic consonants (including holes for corresponding Thai characters)
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fBeginWordSet.add(0x0EDC, 0x0EDD); // digraph consonants (no Thai equivalent)
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fBeginWordSet.add(0x0EC0, 0x0EC4); // prefix vowels
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Compact for caching
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fMarkSet.compact();
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fEndWordSet.compact();
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fBeginWordSet.compact();
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Freeze the static UnicodeSet
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fLaoWordSet.freeze();
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fMarkSet.freeze();
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fEndWordSet.freeze();
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fBeginWordSet.freeze();
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public LaoBreakEngine() throws IOException {
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        super(BreakIterator.KIND_WORD, BreakIterator.KIND_LINE);
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        setCharacters(fLaoWordSet);
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Initialize dictionary
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fDictionary = DictionaryData.loadDictionaryFor("Laoo");
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean equals(Object obj) {
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Normally is a singleton, but it's possible to have duplicates
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //   during initialization. All are equivalent.
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return obj instanceof LaoBreakEngine;
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int hashCode() {
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return getClass().hashCode();
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean handles(int c, int breakType) {
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (breakType == BreakIterator.KIND_WORD || breakType == BreakIterator.KIND_LINE) {
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return (script == UScript.LAO);
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return false;
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int divideUpDictionaryRange(CharacterIterator fIter, int rangeStart, int rangeEnd,
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            DequeI foundBreaks) {
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if ((rangeEnd - rangeStart) < LAO_MIN_WORD) {
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return 0;  // Not enough characters for word
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int wordsFound = 0;
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int wordLength;
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int current;
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        PossibleWord words[] = new PossibleWord[LAO_LOOKAHEAD];
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (int i = 0; i < LAO_LOOKAHEAD; i++) {
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            words[i] = new PossibleWord();
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int uc;
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fIter.setIndex(rangeStart);
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while ((current = fIter.getIndex()) < rangeEnd) {
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            wordLength = 0;
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            //Look for candidate words at the current position
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int candidates = words[wordsFound%LAO_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd);
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // If we found exactly one, use that
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (candidates == 1) {
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                wordLength = words[wordsFound%LAO_LOOKAHEAD].acceptMarked(fIter);
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                wordsFound += 1;
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // If there was more than one, see which one can take us forward the most words
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            else if (candidates > 1) {
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                boolean foundBest = false;
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // If we're already at the end of the range, we're done
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (fIter.getIndex() < rangeEnd) {
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    do {
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        int wordsMatched = 1;
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if (words[(wordsFound+1)%LAO_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd) > 0) {
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            if (wordsMatched < 2) {
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                // Followed by another dictionary word; mark first word as a good candidate
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                words[wordsFound%LAO_LOOKAHEAD].markCurrent();
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                wordsMatched = 2;
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            }
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            // If we're already at the end of the range, we're done
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            if (fIter.getIndex() >= rangeEnd) {
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                break;
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            }
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            // See if any of the possible second words is followed by a third word
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            do {
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                // If we find a third word, stop right away
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                if (words[(wordsFound+2)%LAO_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd) > 0) {
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    words[wordsFound%LAO_LOOKAHEAD].markCurrent();
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    foundBest = true;
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    break;
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                }
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            } while (words[(wordsFound+1)%LAO_LOOKAHEAD].backUp(fIter));
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } while (words[wordsFound%LAO_LOOKAHEAD].backUp(fIter) && !foundBest);
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                wordLength = words[wordsFound%LAO_LOOKAHEAD].acceptMarked(fIter);
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                wordsFound += 1;
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // We come here after having either found a word or not. We look ahead to the
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // next word. If it's not a dictionary word, we will combine it with the word we
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // just found (if there is one), but only if the preceding word does not exceed
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // the threshold.
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // The text iterator should now be positioned at the end of the word we found.
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (fIter.getIndex() < rangeEnd && wordLength < LAO_ROOT_COMBINE_THRESHOLD) {
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // If it is a dictionary word, do nothing. If it isn't, then if there is
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // no preceding word, or the non-word shares less than the minimum threshold
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // of characters with a dictionary word, then scan to resynchronize
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (words[wordsFound%LAO_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd) <= 0 &&
1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        (wordLength == 0 ||
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                words[wordsFound%LAO_LOOKAHEAD].longestPrefix() < LAO_PREFIX_COMBINE_THRESHOLD)) {
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Look for a plausible word boundary
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    int remaining = rangeEnd - (current + wordLength);
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    int pc = fIter.current();
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    int chars = 0;
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    for (;;) {
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        fIter.next();
1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        uc = fIter.current();
1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        chars += 1;
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if (--remaining <= 0) {
1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            break;
1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) {
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            // Maybe. See if it's in the dictionary.
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            int candidate = words[(wordsFound + 1) %LAO_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd);
1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            fIter.setIndex(current + wordLength + chars);
1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            if (candidate > 0) {
1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                break;
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            }
1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        pc = uc;
1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Bump the word count if there wasn't already one
1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if (wordLength <= 0) {
1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        wordsFound += 1;
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Update the length with the passed-over characters
1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    wordLength += chars;
1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Backup to where we were for next iteration
1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    fIter.setIndex(current+wordLength);
1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Never stop before a combining mark.
2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int currPos;
2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while ((currPos = fIter.getIndex()) < rangeEnd && fMarkSet.contains(fIter.current())) {
2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                fIter.next();
2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                wordLength += fIter.getIndex() - currPos;
2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Look ahead for possible suffixes if a dictionary word does not follow.
2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // We do this in code rather than using a rule so that the heuristic
2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // resynch continues to function. For example, one of the suffix characters
2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // could be a typo in the middle of a word.
2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // NOT CURRENTLY APPLICABLE TO LAO
2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Did we find a word on this iteration? If so, push it on the break stack
2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (wordLength > 0) {
2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                foundBreaks.push(Integer.valueOf(current + wordLength));
2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Don't return a break for the end of the dictionary range if there is one there
2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (foundBreaks.peek() >= rangeEnd) {
2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            foundBreaks.pop();
2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            wordsFound -= 1;
2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return wordsFound;
2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
229