17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/* 27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 2014, International Business Machines Corporation and * 47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved. * 57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text; 87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.io.IOException; 107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.text.CharacterIterator; 117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UCharacter; 137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UProperty; 147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UScript; 157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertclass LaoBreakEngine extends DictionaryBreakEngine { 177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Constants for LaoBreakIterator 197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // How many words in a row are "good enough"? 207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final byte LAO_LOOKAHEAD = 3; 217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Will not combine a non-word with a preceding dictionary word longer than this 227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final byte LAO_ROOT_COMBINE_THRESHOLD = 3; 237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Will not combine a non-word that shares at least this much prefix with a 247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // dictionary word with a preceding word 257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final byte LAO_PREFIX_COMBINE_THRESHOLD = 3; 267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Minimum word size 277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final byte LAO_MIN_WORD = 2; 287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private DictionaryMatcher fDictionary; 307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static UnicodeSet fLaoWordSet; 317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static UnicodeSet fEndWordSet; 327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static UnicodeSet fBeginWordSet; 337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static UnicodeSet fMarkSet; 347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static { 367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Initialize UnicodeSets 377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert fLaoWordSet = new UnicodeSet(); 387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert fMarkSet = new UnicodeSet(); 397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert fBeginWordSet = new UnicodeSet(); 407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert fLaoWordSet.applyPattern("[[:Laoo:]&[:LineBreak=SA:]]"); 427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert fLaoWordSet.compact(); 437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert fMarkSet.applyPattern("[[:Laoo:]&[:LineBreak=SA:]&[:M:]]"); 457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert fMarkSet.add(0x0020); 467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert fEndWordSet = new UnicodeSet(fLaoWordSet); 477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert fEndWordSet.remove(0x0EC0, 0x0EC4); // prefix vowels 487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert fBeginWordSet.add(0x0E81, 0x0EAE); // basic consonants (including holes for corresponding Thai characters) 497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert fBeginWordSet.add(0x0EDC, 0x0EDD); // digraph consonants (no Thai equivalent) 507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert fBeginWordSet.add(0x0EC0, 0x0EC4); // prefix vowels 517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Compact for caching 537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert fMarkSet.compact(); 547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert fEndWordSet.compact(); 557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert fBeginWordSet.compact(); 567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Freeze the static UnicodeSet 587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert fLaoWordSet.freeze(); 597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert fMarkSet.freeze(); 607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert fEndWordSet.freeze(); 617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert fBeginWordSet.freeze(); 627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public LaoBreakEngine() throws IOException { 657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert super(BreakIterator.KIND_WORD, BreakIterator.KIND_LINE); 667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setCharacters(fLaoWordSet); 677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Initialize dictionary 687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert fDictionary = DictionaryData.loadDictionaryFor("Laoo"); 697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public boolean equals(Object obj) { 727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Normally is a singleton, but it's possible to have duplicates 737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // during initialization. All are equivalent. 747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return obj instanceof LaoBreakEngine; 757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int hashCode() { 787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return getClass().hashCode(); 797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public boolean handles(int c, int breakType) { 827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (breakType == BreakIterator.KIND_WORD || breakType == BreakIterator.KIND_LINE) { 837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT); 847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return (script == UScript.LAO); 857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return false; 877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int divideUpDictionaryRange(CharacterIterator fIter, int rangeStart, int rangeEnd, 907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert DequeI foundBreaks) { 917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if ((rangeEnd - rangeStart) < LAO_MIN_WORD) { 947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return 0; // Not enough characters for word 957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int wordsFound = 0; 977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int wordLength; 987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int current; 997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert PossibleWord words[] = new PossibleWord[LAO_LOOKAHEAD]; 1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i = 0; i < LAO_LOOKAHEAD; i++) { 1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert words[i] = new PossibleWord(); 1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int uc; 1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert fIter.setIndex(rangeStart); 1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while ((current = fIter.getIndex()) < rangeEnd) { 1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert wordLength = 0; 1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert //Look for candidate words at the current position 1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int candidates = words[wordsFound%LAO_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd); 1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // If we found exactly one, use that 1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (candidates == 1) { 1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert wordLength = words[wordsFound%LAO_LOOKAHEAD].acceptMarked(fIter); 1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert wordsFound += 1; 1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // If there was more than one, see which one can take us forward the most words 1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert else if (candidates > 1) { 1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert boolean foundBest = false; 1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // If we're already at the end of the range, we're done 1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (fIter.getIndex() < rangeEnd) { 1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert do { 1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int wordsMatched = 1; 1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (words[(wordsFound+1)%LAO_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd) > 0) { 1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (wordsMatched < 2) { 1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Followed by another dictionary word; mark first word as a good candidate 1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert words[wordsFound%LAO_LOOKAHEAD].markCurrent(); 1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert wordsMatched = 2; 1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // If we're already at the end of the range, we're done 1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (fIter.getIndex() >= rangeEnd) { 1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // See if any of the possible second words is followed by a third word 1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert do { 1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // If we find a third word, stop right away 1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (words[(wordsFound+2)%LAO_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd) > 0) { 1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert words[wordsFound%LAO_LOOKAHEAD].markCurrent(); 1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert foundBest = true; 1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } while (words[(wordsFound+1)%LAO_LOOKAHEAD].backUp(fIter)); 1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } while (words[wordsFound%LAO_LOOKAHEAD].backUp(fIter) && !foundBest); 1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert wordLength = words[wordsFound%LAO_LOOKAHEAD].acceptMarked(fIter); 1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert wordsFound += 1; 1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // We come here after having either found a word or not. We look ahead to the 1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // next word. If it's not a dictionary word, we will combine it with the word we 1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // just found (if there is one), but only if the preceding word does not exceed 1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // the threshold. 1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // The text iterator should now be positioned at the end of the word we found. 1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (fIter.getIndex() < rangeEnd && wordLength < LAO_ROOT_COMBINE_THRESHOLD) { 1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // If it is a dictionary word, do nothing. If it isn't, then if there is 1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // no preceding word, or the non-word shares less than the minimum threshold 1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // of characters with a dictionary word, then scan to resynchronize 1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (words[wordsFound%LAO_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd) <= 0 && 1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert (wordLength == 0 || 1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert words[wordsFound%LAO_LOOKAHEAD].longestPrefix() < LAO_PREFIX_COMBINE_THRESHOLD)) { 1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Look for a plausible word boundary 1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int remaining = rangeEnd - (current + wordLength); 1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int pc = fIter.current(); 1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int chars = 0; 1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (;;) { 1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert fIter.next(); 1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert uc = fIter.current(); 1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert chars += 1; 1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (--remaining <= 0) { 1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) { 1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Maybe. See if it's in the dictionary. 1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int candidate = words[(wordsFound + 1) %LAO_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd); 1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert fIter.setIndex(current + wordLength + chars); 1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (candidate > 0) { 1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pc = uc; 1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Bump the word count if there wasn't already one 1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (wordLength <= 0) { 1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert wordsFound += 1; 1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Update the length with the passed-over characters 1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert wordLength += chars; 1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Backup to where we were for next iteration 1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert fIter.setIndex(current+wordLength); 1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Never stop before a combining mark. 2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int currPos; 2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while ((currPos = fIter.getIndex()) < rangeEnd && fMarkSet.contains(fIter.current())) { 2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert fIter.next(); 2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert wordLength += fIter.getIndex() - currPos; 2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Look ahead for possible suffixes if a dictionary word does not follow. 2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // We do this in code rather than using a rule so that the heuristic 2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // resynch continues to function. For example, one of the suffix characters 2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // could be a typo in the middle of a word. 2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // NOT CURRENTLY APPLICABLE TO LAO 2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Did we find a word on this iteration? If so, push it on the break stack 2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (wordLength > 0) { 2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert foundBreaks.push(Integer.valueOf(current + wordLength)); 2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Don't return a break for the end of the dictionary range if there is one there 2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (foundBreaks.peek() >= rangeEnd) { 2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert foundBreaks.pop(); 2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert wordsFound -= 1; 2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return wordsFound; 2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert} 229