1/* GENERATED SOURCE. DO NOT MODIFY. */
2// © 2016 and later: Unicode, Inc. and others.
3// License & terms of use: http://www.unicode.org/copyright.html#License
4/*
5 *******************************************************************************
6 * Copyright (C) 2014, International Business Machines Corporation and         *
7 * others. All Rights Reserved.                                                *
8 *******************************************************************************
9 */
10package android.icu.text;
11
12import java.io.IOException;
13import java.text.CharacterIterator;
14
15import android.icu.lang.UCharacter;
16import android.icu.lang.UProperty;
17import android.icu.lang.UScript;
18
19class LaoBreakEngine extends DictionaryBreakEngine {
20
21    // Constants for LaoBreakIterator
22    // How many words in a row are "good enough"?
23    private static final byte LAO_LOOKAHEAD = 3;
24    // Will not combine a non-word with a preceding dictionary word longer than this
25    private static final byte LAO_ROOT_COMBINE_THRESHOLD = 3;
26    // Will not combine a non-word that shares at least this much prefix with a
27    // dictionary word with a preceding word
28    private static final byte LAO_PREFIX_COMBINE_THRESHOLD = 3;
29    // Minimum word size
30    private static final byte LAO_MIN_WORD = 2;
31
32    private DictionaryMatcher fDictionary;
33    private static UnicodeSet fLaoWordSet;
34    private static UnicodeSet fEndWordSet;
35    private static UnicodeSet fBeginWordSet;
36    private static UnicodeSet fMarkSet;
37
38    static {
39        // Initialize UnicodeSets
40        fLaoWordSet = new UnicodeSet();
41        fMarkSet = new UnicodeSet();
42        fBeginWordSet = new UnicodeSet();
43
44        fLaoWordSet.applyPattern("[[:Laoo:]&[:LineBreak=SA:]]");
45        fLaoWordSet.compact();
46
47        fMarkSet.applyPattern("[[:Laoo:]&[:LineBreak=SA:]&[:M:]]");
48        fMarkSet.add(0x0020);
49        fEndWordSet = new UnicodeSet(fLaoWordSet);
50        fEndWordSet.remove(0x0EC0, 0x0EC4); // prefix vowels
51        fBeginWordSet.add(0x0E81, 0x0EAE); // basic consonants (including holes for corresponding Thai characters)
52        fBeginWordSet.add(0x0EDC, 0x0EDD); // digraph consonants (no Thai equivalent)
53        fBeginWordSet.add(0x0EC0, 0x0EC4); // prefix vowels
54
55        // Compact for caching
56        fMarkSet.compact();
57        fEndWordSet.compact();
58        fBeginWordSet.compact();
59
60        // Freeze the static UnicodeSet
61        fLaoWordSet.freeze();
62        fMarkSet.freeze();
63        fEndWordSet.freeze();
64        fBeginWordSet.freeze();
65    }
66
67    public LaoBreakEngine() throws IOException {
68        super(BreakIterator.KIND_WORD, BreakIterator.KIND_LINE);
69        setCharacters(fLaoWordSet);
70        // Initialize dictionary
71        fDictionary = DictionaryData.loadDictionaryFor("Laoo");
72    }
73
74    public boolean equals(Object obj) {
75        // Normally is a singleton, but it's possible to have duplicates
76        //   during initialization. All are equivalent.
77        return obj instanceof LaoBreakEngine;
78    }
79
80    public int hashCode() {
81        return getClass().hashCode();
82    }
83
84    public boolean handles(int c, int breakType) {
85        if (breakType == BreakIterator.KIND_WORD || breakType == BreakIterator.KIND_LINE) {
86            int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
87            return (script == UScript.LAO);
88        }
89        return false;
90    }
91
92    public int divideUpDictionaryRange(CharacterIterator fIter, int rangeStart, int rangeEnd,
93            DequeI foundBreaks) {
94
95
96        if ((rangeEnd - rangeStart) < LAO_MIN_WORD) {
97            return 0;  // Not enough characters for word
98        }
99        int wordsFound = 0;
100        int wordLength;
101        int current;
102        PossibleWord words[] = new PossibleWord[LAO_LOOKAHEAD];
103        for (int i = 0; i < LAO_LOOKAHEAD; i++) {
104            words[i] = new PossibleWord();
105        }
106        int uc;
107
108        fIter.setIndex(rangeStart);
109        while ((current = fIter.getIndex()) < rangeEnd) {
110            wordLength = 0;
111
112            //Look for candidate words at the current position
113            int candidates = words[wordsFound%LAO_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd);
114
115            // If we found exactly one, use that
116            if (candidates == 1) {
117                wordLength = words[wordsFound%LAO_LOOKAHEAD].acceptMarked(fIter);
118                wordsFound += 1;
119            }
120
121            // If there was more than one, see which one can take us forward the most words
122            else if (candidates > 1) {
123                boolean foundBest = false;
124                // If we're already at the end of the range, we're done
125                if (fIter.getIndex() < rangeEnd) {
126                    do {
127                        int wordsMatched = 1;
128                        if (words[(wordsFound+1)%LAO_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd) > 0) {
129                            if (wordsMatched < 2) {
130                                // Followed by another dictionary word; mark first word as a good candidate
131                                words[wordsFound%LAO_LOOKAHEAD].markCurrent();
132                                wordsMatched = 2;
133                            }
134
135                            // If we're already at the end of the range, we're done
136                            if (fIter.getIndex() >= rangeEnd) {
137                                break;
138                            }
139
140                            // See if any of the possible second words is followed by a third word
141                            do {
142                                // If we find a third word, stop right away
143                                if (words[(wordsFound+2)%LAO_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd) > 0) {
144                                    words[wordsFound%LAO_LOOKAHEAD].markCurrent();
145                                    foundBest = true;
146                                    break;
147                                }
148                            } while (words[(wordsFound+1)%LAO_LOOKAHEAD].backUp(fIter));
149                        }
150                    } while (words[wordsFound%LAO_LOOKAHEAD].backUp(fIter) && !foundBest);
151                }
152                wordLength = words[wordsFound%LAO_LOOKAHEAD].acceptMarked(fIter);
153                wordsFound += 1;
154            }
155
156            // We come here after having either found a word or not. We look ahead to the
157            // next word. If it's not a dictionary word, we will combine it with the word we
158            // just found (if there is one), but only if the preceding word does not exceed
159            // the threshold.
160            // The text iterator should now be positioned at the end of the word we found.
161            if (fIter.getIndex() < rangeEnd && wordLength < LAO_ROOT_COMBINE_THRESHOLD) {
162                // If it is a dictionary word, do nothing. If it isn't, then if there is
163                // no preceding word, or the non-word shares less than the minimum threshold
164                // of characters with a dictionary word, then scan to resynchronize
165                if (words[wordsFound%LAO_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd) <= 0 &&
166                        (wordLength == 0 ||
167                                words[wordsFound%LAO_LOOKAHEAD].longestPrefix() < LAO_PREFIX_COMBINE_THRESHOLD)) {
168                    // Look for a plausible word boundary
169                    int remaining = rangeEnd - (current + wordLength);
170                    int pc = fIter.current();
171                    int chars = 0;
172                    for (;;) {
173                        fIter.next();
174                        uc = fIter.current();
175                        chars += 1;
176                        if (--remaining <= 0) {
177                            break;
178                        }
179                        if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) {
180                            // Maybe. See if it's in the dictionary.
181                            int candidate = words[(wordsFound + 1) %LAO_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd);
182                            fIter.setIndex(current + wordLength + chars);
183                            if (candidate > 0) {
184                                break;
185                            }
186                        }
187                        pc = uc;
188                    }
189
190                    // Bump the word count if there wasn't already one
191                    if (wordLength <= 0) {
192                        wordsFound += 1;
193                    }
194
195                    // Update the length with the passed-over characters
196                    wordLength += chars;
197                } else {
198                    // Backup to where we were for next iteration
199                    fIter.setIndex(current+wordLength);
200                }
201            }
202
203            // Never stop before a combining mark.
204            int currPos;
205            while ((currPos = fIter.getIndex()) < rangeEnd && fMarkSet.contains(fIter.current())) {
206                fIter.next();
207                wordLength += fIter.getIndex() - currPos;
208            }
209
210            // Look ahead for possible suffixes if a dictionary word does not follow.
211            // We do this in code rather than using a rule so that the heuristic
212            // resynch continues to function. For example, one of the suffix characters
213            // could be a typo in the middle of a word.
214            // NOT CURRENTLY APPLICABLE TO LAO
215
216            // Did we find a word on this iteration? If so, push it on the break stack
217            if (wordLength > 0) {
218                foundBreaks.push(Integer.valueOf(current + wordLength));
219            }
220        }
221
222        // Don't return a break for the end of the dictionary range if there is one there
223        if (foundBreaks.peek() >= rangeEnd) {
224            foundBreaks.pop();
225            wordsFound -= 1;
226        }
227
228        return wordsFound;
229    }
230
231}
232