patricia_trie_policy.cpp revision 2d57b3339ad5b4bbf0939858c36c7daf5e38a4cb
1/*
2 * Copyright (C) 2013, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h"
19
20#include "defines.h"
21#include "suggest/core/dicnode/dic_node.h"
22#include "suggest/core/dicnode/dic_node_vector.h"
23#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
24#include "suggest/core/dictionary/ngram_listener.h"
25#include "suggest/core/session/prev_words_info.h"
26#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
27#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
28#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
29#include "utils/char_utils.h"
30
31namespace latinime {
32
33void PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
34        DicNodeVector *const childDicNodes) const {
35    if (!dicNode->hasChildren()) {
36        return;
37    }
38    int nextPos = dicNode->getChildrenPtNodeArrayPos();
39    if (nextPos < 0 || nextPos >= mDictBufferSize) {
40        AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %d",
41                nextPos, mDictBufferSize);
42        mIsCorrupted = true;
43        ASSERT(false);
44        return;
45    }
46    const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
47            mDictRoot, &nextPos);
48    for (int i = 0; i < childCount; i++) {
49        if (nextPos < 0 || nextPos >= mDictBufferSize) {
50            AKLOGE("Child PtNode position is invalid. pos: %d, dict size: %d, childCount: %d / %d",
51                    nextPos, mDictBufferSize, i, childCount);
52            mIsCorrupted = true;
53            ASSERT(false);
54            return;
55        }
56        nextPos = createAndGetLeavingChildNode(dicNode, nextPos, childDicNodes);
57    }
58}
59
60// This retrieves code points and the probability of the word by its terminal position.
61// Due to the fact that words are ordered in the dictionary in a strict breadth-first order,
62// it is possible to check for this with advantageous complexity. For each PtNode array, we search
63// for PtNodes with children and compare the children position with the position we look for.
64// When we shoot the position we look for, it means the word we look for is in the children
65// of the previous PtNode. The only tricky part is the fact that if we arrive at the end of a
66// PtNode array with the last PtNode's children position still less than what we are searching for,
67// we must descend the last PtNode's children (for example, if the word we are searching for starts
68// with a z, it's the last PtNode of the root array, so all children addresses will be smaller
69// than the position we look for, and we have to descend the z PtNode).
70/* Parameters :
71 * ptNodePos: the byte position of the terminal PtNode of the word we are searching for (this is
72 *   what is stored as the "bigram position" in each bigram)
73 * outCodePoints: an array to write the found word, with MAX_WORD_LENGTH size.
74 * outUnigramProbability: a pointer to an int to write the probability into.
75 * Return value : the code point count, of 0 if the word was not found.
76 */
77// TODO: Split this function to be more readable
78int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
79        const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
80        int *const outUnigramProbability) const {
81    int pos = getRootPosition();
82    int wordPos = 0;
83    // One iteration of the outer loop iterates through PtNode arrays. As stated above, we will
84    // only traverse PtNodes that are actually a part of the terminal we are searching, so each
85    // time we enter this loop we are one depth level further than last time.
86    // The only reason we count PtNodes is because we want to reduce the probability of infinite
87    // looping in case there is a bug. Since we know there is an upper bound to the depth we are
88    // supposed to traverse, it does not hurt to count iterations.
89    for (int loopCount = maxCodePointCount; loopCount > 0; --loopCount) {
90        int lastCandidatePtNodePos = 0;
91        // Let's loop through PtNodes in this PtNode array searching for either the terminal
92        // or one of its ascendants.
93        if (pos < 0 || pos >= mDictBufferSize) {
94            AKLOGE("PtNode array position is invalid. pos: %d, dict size: %d",
95                    pos, mDictBufferSize);
96            mIsCorrupted = true;
97            ASSERT(false);
98            *outUnigramProbability = NOT_A_PROBABILITY;
99            return 0;
100        }
101        for (int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
102                mDictRoot, &pos); ptNodeCount > 0; --ptNodeCount) {
103            const int startPos = pos;
104            if (pos < 0 || pos >= mDictBufferSize) {
105                AKLOGE("PtNode position is invalid. pos: %d, dict size: %d", pos, mDictBufferSize);
106                mIsCorrupted = true;
107                ASSERT(false);
108                *outUnigramProbability = NOT_A_PROBABILITY;
109                return 0;
110            }
111            const PatriciaTrieReadingUtils::NodeFlags flags =
112                    PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
113            const int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
114                    mDictRoot, &pos);
115            if (ptNodePos == startPos) {
116                // We found the position. Copy the rest of the code points in the buffer and return
117                // the length.
118                outCodePoints[wordPos] = character;
119                if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
120                    int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
121                            mDictRoot, &pos);
122                    // We count code points in order to avoid infinite loops if the file is broken
123                    // or if there is some other bug
124                    int charCount = maxCodePointCount;
125                    while (NOT_A_CODE_POINT != nextChar && --charCount > 0) {
126                        outCodePoints[++wordPos] = nextChar;
127                        nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
128                                mDictRoot, &pos);
129                    }
130                }
131                *outUnigramProbability =
132                        PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot,
133                                &pos);
134                return ++wordPos;
135            }
136            // We need to skip past this PtNode, so skip any remaining code points after the
137            // first and possibly the probability.
138            if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
139                PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
140            }
141            if (PatriciaTrieReadingUtils::isTerminal(flags)) {
142                PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
143            }
144            // The fact that this PtNode has children is very important. Since we already know
145            // that this PtNode does not match, if it has no children we know it is irrelevant
146            // to what we are searching for.
147            const bool hasChildren = PatriciaTrieReadingUtils::hasChildrenInFlags(flags);
148            // We will write in `found' whether we have passed the children position we are
149            // searching for. For example if we search for "beer", the children of b are less
150            // than the address we are searching for and the children of c are greater. When we
151            // come here for c, we realize this is too big, and that we should descend b.
152            bool found;
153            if (hasChildren) {
154                int currentPos = pos;
155                // Here comes the tricky part. First, read the children position.
156                const int childrenPos = PatriciaTrieReadingUtils
157                        ::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &currentPos);
158                if (childrenPos > ptNodePos) {
159                    // If the children pos is greater than the position, it means the previous
160                    // PtNode, which position is stored in lastCandidatePtNodePos, was the right
161                    // one.
162                    found = true;
163                } else if (1 >= ptNodeCount) {
164                    // However if we are on the LAST PtNode of this array, and we have NOT shot the
165                    // position we should descend THIS PtNode. So we trick the
166                    // lastCandidatePtNodePos so that we will descend this PtNode, not the previous
167                    // one.
168                    lastCandidatePtNodePos = startPos;
169                    found = true;
170                } else {
171                    // Else, we should continue looking.
172                    found = false;
173                }
174            } else {
175                // Even if we don't have children here, we could still be on the last PtNode of
176                // this array. If this is the case, we should descend the last PtNode that had
177                // children, and their position is already in lastCandidatePtNodePos.
178                found = (1 >= ptNodeCount);
179            }
180
181            if (found) {
182                // Okay, we found the PtNode we should descend. Its position is in
183                // the lastCandidatePtNodePos variable, so we just re-read it.
184                if (0 != lastCandidatePtNodePos) {
185                    const PatriciaTrieReadingUtils::NodeFlags lastFlags =
186                            PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(
187                                    mDictRoot, &lastCandidatePtNodePos);
188                    const int lastChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
189                            mDictRoot, &lastCandidatePtNodePos);
190                    // We copy all the characters in this PtNode to the buffer
191                    outCodePoints[wordPos] = lastChar;
192                    if (PatriciaTrieReadingUtils::hasMultipleChars(lastFlags)) {
193                        int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
194                                mDictRoot, &lastCandidatePtNodePos);
195                        int charCount = maxCodePointCount;
196                        while (-1 != nextChar && --charCount > 0) {
197                            outCodePoints[++wordPos] = nextChar;
198                            nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
199                                    mDictRoot, &lastCandidatePtNodePos);
200                        }
201                    }
202                    ++wordPos;
203                    // Now we only need to branch to the children address. Skip the probability if
204                    // it's there, read pos, and break to resume the search at pos.
205                    if (PatriciaTrieReadingUtils::isTerminal(lastFlags)) {
206                        PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot,
207                                &lastCandidatePtNodePos);
208                    }
209                    pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
210                            mDictRoot, lastFlags, &lastCandidatePtNodePos);
211                    break;
212                } else {
213                    // Here is a little tricky part: we come here if we found out that all children
214                    // addresses in this PtNode are bigger than the address we are searching for.
215                    // Should we conclude the word is not in the dictionary? No! It could still be
216                    // one of the remaining PtNodes in this array, so we have to keep looking in
217                    // this array until we find it (or we realize it's not there either, in which
218                    // case it's actually not in the dictionary). Pass the end of this PtNode,
219                    // ready to start the next one.
220                    if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
221                        PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
222                                mDictRoot, flags, &pos);
223                    }
224                    if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
225                        mShortcutListPolicy.skipAllShortcuts(&pos);
226                    }
227                    if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
228                        if (!mBigramListPolicy.skipAllBigrams(&pos)) {
229                            AKLOGE("Cannot skip bigrams. BufSize: %d, pos: %d.", mDictBufferSize,
230                                    pos);
231                            mIsCorrupted = true;
232                            ASSERT(false);
233                            *outUnigramProbability = NOT_A_PROBABILITY;
234                            return 0;
235                        }
236                    }
237                }
238            } else {
239                // If we did not find it, we should record the last children address for the next
240                // iteration.
241                if (hasChildren) lastCandidatePtNodePos = startPos;
242                // Now skip the end of this PtNode (children pos and the attributes if any) so that
243                // our pos is after the end of this PtNode, at the start of the next one.
244                if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
245                    PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
246                            mDictRoot, flags, &pos);
247                }
248                if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
249                    mShortcutListPolicy.skipAllShortcuts(&pos);
250                }
251                if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
252                    if (!mBigramListPolicy.skipAllBigrams(&pos)) {
253                        AKLOGE("Cannot skip bigrams. BufSize: %d, pos: %d.", mDictBufferSize, pos);
254                        mIsCorrupted = true;
255                        ASSERT(false);
256                        *outUnigramProbability = NOT_A_PROBABILITY;
257                        return 0;
258                    }
259                }
260            }
261
262        }
263    }
264    // If we have looked through all the PtNodes and found no match, the ptNodePos is
265    // not the position of a terminal in this dictionary.
266    return 0;
267}
268
269// This function gets the position of the terminal PtNode of the exact matching word in the
270// dictionary. If no match is found, it returns NOT_A_DICT_POS.
271int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
272        const int length, const bool forceLowerCaseSearch) const {
273    DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader);
274    readingHelper.initWithPtNodeArrayPos(getRootPosition());
275    const int ptNodePos =
276            readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
277    if (readingHelper.isError()) {
278        mIsCorrupted = true;
279        AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
280    }
281    return ptNodePos;
282}
283
284int PatriciaTriePolicy::getProbability(const int unigramProbability,
285        const int bigramProbability) const {
286    // Due to space constraints, the probability for bigrams is approximate - the lower the unigram
287    // probability, the worse the precision. The theoritical maximum error in resulting probability
288    // is 8 - although in the practice it's never bigger than 3 or 4 in very bad cases. This means
289    // that sometimes, we'll see some bigrams interverted here, but it can't get too bad.
290    if (unigramProbability == NOT_A_PROBABILITY) {
291        return NOT_A_PROBABILITY;
292    } else if (bigramProbability == NOT_A_PROBABILITY) {
293        return ProbabilityUtils::backoff(unigramProbability);
294    } else {
295        return ProbabilityUtils::computeProbabilityForBigram(unigramProbability,
296                bigramProbability);
297    }
298}
299
300int PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo,
301        const int ptNodePos) const {
302    if (ptNodePos == NOT_A_DICT_POS) {
303        return NOT_A_PROBABILITY;
304    }
305    const PtNodeParams ptNodeParams =
306            mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
307    if (ptNodeParams.isNotAWord() || ptNodeParams.isBlacklisted()) {
308        // If this is not a word, or if it's a blacklisted entry, it should behave as
309        // having no probability outside of the suggestion process (where it should be used
310        // for shortcuts).
311        return NOT_A_PROBABILITY;
312    }
313    if (prevWordsInfo) {
314        BinaryDictionaryBigramsIterator bigramsIt =
315                prevWordsInfo->getBigramsIteratorForPrediction(this /* dictStructurePolicy */);
316        while (bigramsIt.hasNext()) {
317            bigramsIt.next();
318            if (bigramsIt.getBigramPos() == ptNodePos
319                    && bigramsIt.getProbability() != NOT_A_PROBABILITY) {
320                return getProbability(ptNodeParams.getProbability(), bigramsIt.getProbability());
321            }
322        }
323        return NOT_A_PROBABILITY;
324    }
325    return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
326}
327
328void PatriciaTriePolicy::iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
329        NgramListener *const listener) const {
330    BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction(
331            this /* dictStructurePolicy */);
332    while (bigramsIt.hasNext()) {
333        bigramsIt.next();
334        listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
335    }
336}
337
338int PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
339    if (ptNodePos == NOT_A_DICT_POS) {
340        return NOT_A_DICT_POS;
341    }
342    return mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos).getShortcutPos();
343}
344
345BinaryDictionaryBigramsIterator PatriciaTriePolicy::getBigramsIteratorOfPtNode(
346        const int ptNodePos) const {
347    const int bigramsPosition = getBigramsPositionOfPtNode(ptNodePos);
348    return BinaryDictionaryBigramsIterator(&mBigramListPolicy, bigramsPosition);
349}
350
351int PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
352    if (ptNodePos == NOT_A_DICT_POS) {
353        return NOT_A_DICT_POS;
354    }
355    return mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos).getBigramsPos();
356}
357
358int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode,
359        const int ptNodePos, DicNodeVector *childDicNodes) const {
360    PatriciaTrieReadingUtils::NodeFlags flags;
361    int mergedNodeCodePointCount = 0;
362    int mergedNodeCodePoints[MAX_WORD_LENGTH];
363    int probability = NOT_A_PROBABILITY;
364    int childrenPos = NOT_A_DICT_POS;
365    int shortcutPos = NOT_A_DICT_POS;
366    int bigramPos = NOT_A_DICT_POS;
367    int siblingPos = NOT_A_DICT_POS;
368    PatriciaTrieReadingUtils::readPtNodeInfo(mDictRoot, ptNodePos, getShortcutsStructurePolicy(),
369            &mBigramListPolicy, &flags, &mergedNodeCodePointCount, mergedNodeCodePoints,
370            &probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos);
371    // Skip PtNodes don't start with Unicode code point because they represent non-word information.
372    if (CharUtils::isInUnicodeSpace(mergedNodeCodePoints[0])) {
373        childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability,
374                PatriciaTrieReadingUtils::isTerminal(flags),
375                PatriciaTrieReadingUtils::hasChildrenInFlags(flags),
376                PatriciaTrieReadingUtils::isBlacklisted(flags)
377                        || PatriciaTrieReadingUtils::isNotAWord(flags),
378                mergedNodeCodePointCount, mergedNodeCodePoints);
379    }
380    return siblingPos;
381}
382
383const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoints,
384        const int codePointCount) const {
385    const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount,
386            false /* forceLowerCaseSearch */);
387    if (ptNodePos == NOT_A_DICT_POS) {
388        AKLOGE("getWordProperty was called for invalid word.");
389        return WordProperty();
390    }
391    const PtNodeParams ptNodeParams =
392            mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
393    std::vector<int> codePointVector(ptNodeParams.getCodePoints(),
394            ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount());
395    // Fetch bigram information.
396    std::vector<BigramProperty> bigrams;
397    const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
398    int bigramWord1CodePoints[MAX_WORD_LENGTH];
399    BinaryDictionaryBigramsIterator bigramsIt(&mBigramListPolicy, bigramListPos);
400    while (bigramsIt.hasNext()) {
401        // Fetch the next bigram information and forward the iterator.
402        bigramsIt.next();
403        // Skip the entry if the entry has been deleted. This never happens for ver2 dicts.
404        if (bigramsIt.getBigramPos() != NOT_A_DICT_POS) {
405            int word1Probability = NOT_A_PROBABILITY;
406            const int word1CodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
407                    bigramsIt.getBigramPos(), MAX_WORD_LENGTH, bigramWord1CodePoints,
408                    &word1Probability);
409            const std::vector<int> word1(bigramWord1CodePoints,
410                    bigramWord1CodePoints + word1CodePointCount);
411            const int probability = getProbability(word1Probability, bigramsIt.getProbability());
412            bigrams.emplace_back(&word1, probability,
413                    NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */);
414        }
415    }
416    // Fetch shortcut information.
417    std::vector<UnigramProperty::ShortcutProperty> shortcuts;
418    int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
419    if (shortcutPos != NOT_A_DICT_POS) {
420        int shortcutTargetCodePoints[MAX_WORD_LENGTH];
421        ShortcutListReadingUtils::getShortcutListSizeAndForwardPointer(mDictRoot, &shortcutPos);
422        bool hasNext = true;
423        while (hasNext) {
424            const ShortcutListReadingUtils::ShortcutFlags shortcutFlags =
425                    ShortcutListReadingUtils::getFlagsAndForwardPointer(mDictRoot, &shortcutPos);
426            hasNext = ShortcutListReadingUtils::hasNext(shortcutFlags);
427            const int shortcutTargetLength = ShortcutListReadingUtils::readShortcutTarget(
428                    mDictRoot, MAX_WORD_LENGTH, shortcutTargetCodePoints, &shortcutPos);
429            const std::vector<int> shortcutTarget(shortcutTargetCodePoints,
430                    shortcutTargetCodePoints + shortcutTargetLength);
431            const int shortcutProbability =
432                    ShortcutListReadingUtils::getProbabilityFromFlags(shortcutFlags);
433            shortcuts.emplace_back(&shortcutTarget, shortcutProbability);
434        }
435    }
436    const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
437            ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
438            NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
439    return WordProperty(&codePointVector, &unigramProperty, &bigrams);
440}
441
442int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,
443        int *const outCodePointCount) {
444    *outCodePointCount = 0;
445    if (token == 0) {
446        // Start iterating the dictionary.
447        mTerminalPtNodePositionsForIteratingWords.clear();
448        DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy(
449                &mTerminalPtNodePositionsForIteratingWords);
450        DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader);
451        readingHelper.initWithPtNodeArrayPos(getRootPosition());
452        readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy);
453    }
454    const int terminalPtNodePositionsVectorSize =
455            static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size());
456    if (token < 0 || token >= terminalPtNodePositionsVectorSize) {
457        AKLOGE("Given token %d is invalid.", token);
458        return 0;
459    }
460    const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
461    int unigramProbability = NOT_A_PROBABILITY;
462    *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(terminalPtNodePos,
463            MAX_WORD_LENGTH, outCodePoints, &unigramProbability);
464    const int nextToken = token + 1;
465    if (nextToken >= terminalPtNodePositionsVectorSize) {
466        // All words have been iterated.
467        mTerminalPtNodePositionsForIteratingWords.clear();
468        return 0;
469    }
470    return nextToken;
471}
472
473} // namespace latinime
474