ver4_patricia_trie_policy.cpp revision 96d47fe7457ff1dbea4696a5e0edec2801610d47
1/*
2 * Copyright (C) 2013, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h"
18
19#include <vector>
20
21#include "suggest/core/dicnode/dic_node.h"
22#include "suggest/core/dicnode/dic_node_vector.h"
23#include "suggest/core/dictionary/property/bigram_property.h"
24#include "suggest/core/dictionary/property/unigram_property.h"
25#include "suggest/core/dictionary/property/word_property.h"
26#include "suggest/core/session/prev_words_info.h"
27#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
28#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
29#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
30#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
31
32namespace latinime {
33
34// Note that there are corresponding definitions in Java side in BinaryDictionaryTests and
35// BinaryDictionaryDecayingTests.
36const char *const Ver4PatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
37const char *const Ver4PatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
38const char *const Ver4PatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
39const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
40const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024;
41const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
42        Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
43
44void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
45        DicNodeVector *const childDicNodes) const {
46    if (!dicNode->hasChildren()) {
47        return;
48    }
49    DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
50    readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
51    while (!readingHelper.isEnd()) {
52        const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams();
53        if (!ptNodeParams.isValid()) {
54            break;
55        }
56        bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
57        if (isTerminal && mHeaderPolicy->isDecayingDict()) {
58            // A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
59            // probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
60            // valid terminal DicNode.
61            isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY;
62        }
63        readingHelper.readNextSiblingNode(ptNodeParams);
64        if (ptNodeParams.representsNonWordInfo()) {
65            // Skip PtNodes that represent non-word information.
66            continue;
67        }
68        childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(),
69                ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal,
70                ptNodeParams.hasChildren(),
71                ptNodeParams.isBlacklisted()
72                        || ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */,
73                ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
74    }
75    if (readingHelper.isError()) {
76        mIsCorrupted = true;
77        AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
78    }
79}
80
81int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
82        const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
83        int *const outUnigramProbability) const {
84    DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
85    readingHelper.initWithPtNodePos(ptNodePos);
86    const int codePointCount =  readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(
87            maxCodePointCount, outCodePoints, outUnigramProbability);
88    if (readingHelper.isError()) {
89        mIsCorrupted = true;
90        AKLOGE("Dictionary reading error in getCodePointsAndProbabilityAndReturnCodePointCount().");
91    }
92    return codePointCount;
93}
94
95int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
96        const int length, const bool forceLowerCaseSearch) const {
97    DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
98    readingHelper.initWithPtNodeArrayPos(getRootPosition());
99    const int ptNodePos =
100            readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
101    if (readingHelper.isError()) {
102        mIsCorrupted = true;
103        AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
104    }
105    return ptNodePos;
106}
107
108int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
109        const int bigramProbability) const {
110    if (mHeaderPolicy->isDecayingDict()) {
111        // Both probabilities are encoded. Decode them and get probability.
112        return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability);
113    } else {
114        if (unigramProbability == NOT_A_PROBABILITY) {
115            return NOT_A_PROBABILITY;
116        } else if (bigramProbability == NOT_A_PROBABILITY) {
117            return ProbabilityUtils::backoff(unigramProbability);
118        } else {
119            return bigramProbability;
120        }
121    }
122}
123
124int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo,
125        const int ptNodePos) const {
126    if (prevWordsInfo) {
127        // TODO: Return probability using prevWordsInfo.
128        return NOT_A_PROBABILITY;
129    }
130    if (ptNodePos == NOT_A_DICT_POS) {
131        return NOT_A_PROBABILITY;
132    }
133    const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
134    if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
135        return NOT_A_PROBABILITY;
136    }
137    return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
138}
139
140int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
141    if (ptNodePos == NOT_A_DICT_POS) {
142        return NOT_A_DICT_POS;
143    }
144    const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
145    if (ptNodeParams.isDeleted()) {
146        return NOT_A_DICT_POS;
147    }
148    return mBuffers->getShortcutDictContent()->getShortcutListHeadPos(
149            ptNodeParams.getTerminalId());
150}
151
152BinaryDictionaryBigramsIterator Ver4PatriciaTriePolicy::getBigramsIteratorOfPtNode(
153        const int ptNodePos) const {
154    const int bigramsPosition = getBigramsPositionOfPtNode(ptNodePos);
155    return BinaryDictionaryBigramsIterator(&mBigramPolicy, bigramsPosition);
156}
157
158int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
159    if (ptNodePos == NOT_A_DICT_POS) {
160        return NOT_A_DICT_POS;
161    }
162    const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
163    if (ptNodeParams.isDeleted()) {
164        return NOT_A_DICT_POS;
165    }
166    return mBuffers->getBigramDictContent()->getBigramListHeadPos(
167            ptNodeParams.getTerminalId());
168}
169
170bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int length,
171        const UnigramProperty *const unigramProperty) {
172    if (!mBuffers->isUpdatable()) {
173        AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary.");
174        return false;
175    }
176    if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
177        AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
178                mDictBuffer->getTailPosition());
179        return false;
180    }
181    if (length > MAX_WORD_LENGTH) {
182        AKLOGE("The word is too long to insert to the dictionary, length: %d", length);
183        return false;
184    }
185    for (const auto &shortcut : unigramProperty->getShortcuts()) {
186        if (shortcut.getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
187            AKLOGE("One of shortcut targets is too long to insert to the dictionary, length: %d",
188                    shortcut.getTargetCodePoints()->size());
189            return false;
190        }
191    }
192    DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
193    readingHelper.initWithPtNodeArrayPos(getRootPosition());
194    bool addedNewUnigram = false;
195    int codePointsToAdd[MAX_WORD_LENGTH];
196    int codePointCountToAdd = length;
197    memmove(codePointsToAdd, word, sizeof(int) * length);
198    if (unigramProperty->representsBeginningOfSentence()) {
199        codePointCountToAdd = CharUtils::attachBeginningOfSentenceMarker(codePointsToAdd,
200                codePointCountToAdd, MAX_WORD_LENGTH);
201    }
202    if (codePointCountToAdd <= 0) {
203        return false;
204    }
205    if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointsToAdd, codePointCountToAdd,
206            unigramProperty, &addedNewUnigram)) {
207        if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) {
208            mUnigramCount++;
209        }
210        if (unigramProperty->getShortcuts().size() > 0) {
211            // Add shortcut target.
212            const int wordPos = getTerminalPtNodePositionOfWord(word, length,
213                    false /* forceLowerCaseSearch */);
214            if (wordPos == NOT_A_DICT_POS) {
215                AKLOGE("Cannot find terminal PtNode position to add shortcut target.");
216                return false;
217            }
218            for (const auto &shortcut : unigramProperty->getShortcuts()) {
219                if (!mUpdatingHelper.addShortcutTarget(wordPos,
220                        shortcut.getTargetCodePoints()->data(),
221                        shortcut.getTargetCodePoints()->size(), shortcut.getProbability())) {
222                    AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %d, "
223                            "probability: %d", wordPos, shortcut.getTargetCodePoints()->size(),
224                            shortcut.getProbability());
225                    return false;
226                }
227            }
228        }
229        return true;
230    } else {
231        return false;
232    }
233}
234
235bool Ver4PatriciaTriePolicy::removeUnigramEntry(const int *const word, const int length) {
236    if (!mBuffers->isUpdatable()) {
237        AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary.");
238        return false;
239    }
240    const int ptNodePos = getTerminalPtNodePositionOfWord(word, length,
241            false /* forceLowerCaseSearch */);
242    if (ptNodePos == NOT_A_DICT_POS) {
243        return false;
244    }
245    const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
246    if (!mNodeWriter.markPtNodeAsDeleted(&ptNodeParams)) {
247        AKLOGE("Cannot remove unigram. ptNodePos: %d", ptNodePos);
248        return false;
249    }
250    if (!ptNodeParams.representsNonWordInfo()) {
251        mUnigramCount--;
252    }
253    return true;
254}
255
256bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
257        const BigramProperty *const bigramProperty) {
258    if (!mBuffers->isUpdatable()) {
259        AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
260        return false;
261    }
262    if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
263        AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
264                mDictBuffer->getTailPosition());
265        return false;
266    }
267    if (!prevWordsInfo->isValid()) {
268        AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary.");
269        return false;
270    }
271    if (bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
272        AKLOGE("The word is too long to insert the ngram to the dictionary. "
273                "length: %d", bigramProperty->getTargetCodePoints()->size());
274        return false;
275    }
276    int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
277    prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
278            false /* tryLowerCaseSearch */);
279    // TODO: Support N-gram.
280    if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
281        if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) {
282            const std::vector<UnigramProperty::ShortcutProperty> shortcuts;
283            const UnigramProperty beginningOfSentenceUnigramProperty(
284                    true /* representsBeginningOfSentence */, true /* isNotAWord */,
285                    false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
286                    NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
287            if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
288                    prevWordsInfo->getNthPrevWordCodePointCount(1 /* n */),
289                    &beginningOfSentenceUnigramProperty)) {
290                AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
291                return false;
292            }
293            // Refresh Terminal PtNode positions.
294            prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
295                    false /* tryLowerCaseSearch */);
296        } else {
297            return false;
298        }
299    }
300    const int word1Pos = getTerminalPtNodePositionOfWord(
301            bigramProperty->getTargetCodePoints()->data(),
302            bigramProperty->getTargetCodePoints()->size(), false /* forceLowerCaseSearch */);
303    if (word1Pos == NOT_A_DICT_POS) {
304        return false;
305    }
306    bool addedNewBigram = false;
307    if (mUpdatingHelper.addBigramWords(prevWordsPtNodePos[0], word1Pos, bigramProperty,
308            &addedNewBigram)) {
309        if (addedNewBigram) {
310            mBigramCount++;
311        }
312        return true;
313    } else {
314        return false;
315    }
316}
317
318bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
319        const int *const word, const int length) {
320    if (!mBuffers->isUpdatable()) {
321        AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
322        return false;
323    }
324    if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
325        AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
326                mDictBuffer->getTailPosition());
327        return false;
328    }
329    if (!prevWordsInfo->isValid()) {
330        AKLOGE("prev words info is not valid for removing n-gram entry form the dictionary.");
331        return false;
332    }
333    if (length > MAX_WORD_LENGTH) {
334        AKLOGE("word is too long to remove n-gram entry form the dictionary. length: %d", length);
335    }
336    int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
337    prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
338            false /* tryLowerCaseSerch */);
339    // TODO: Support N-gram.
340    if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
341        return false;
342    }
343    const int wordPos = getTerminalPtNodePositionOfWord(word, length,
344            false /* forceLowerCaseSearch */);
345    if (wordPos == NOT_A_DICT_POS) {
346        return false;
347    }
348    if (mUpdatingHelper.removeBigramWords(prevWordsPtNodePos[0], wordPos)) {
349        mBigramCount--;
350        return true;
351    } else {
352        return false;
353    }
354}
355
356bool Ver4PatriciaTriePolicy::flush(const char *const filePath) {
357    if (!mBuffers->isUpdatable()) {
358        AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
359        return false;
360    }
361    if (!mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount)) {
362        AKLOGE("Cannot flush the dictionary to file.");
363        mIsCorrupted = true;
364        return false;
365    }
366    return true;
367}
368
369bool Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
370    if (!mBuffers->isUpdatable()) {
371        AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
372        return false;
373    }
374    if (!mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath)) {
375        AKLOGE("Cannot flush the dictionary to file with GC.");
376        mIsCorrupted = true;
377        return false;
378    }
379    return true;
380}
381
382bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
383    if (!mBuffers->isUpdatable()) {
384        AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
385        return false;
386    }
387    if (mBuffers->isNearSizeLimit()) {
388        // Additional buffer size is near the limit.
389        return true;
390    } else if (mHeaderPolicy->getExtendedRegionSize() + mDictBuffer->getUsedAdditionalBufferSize()
391            > Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE) {
392        // Total extended region size of the trie exceeds the limit.
393        return true;
394    } else if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS
395            && mDictBuffer->getUsedAdditionalBufferSize() > 0) {
396        // Needs to reduce dictionary size.
397        return true;
398    } else if (mHeaderPolicy->isDecayingDict()) {
399        return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mUnigramCount, mBigramCount,
400                mHeaderPolicy);
401    }
402    return false;
403}
404
405void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int queryLength,
406        char *const outResult, const int maxResultLength) {
407    const int compareLength = queryLength + 1 /* terminator */;
408    if (strncmp(query, UNIGRAM_COUNT_QUERY, compareLength) == 0) {
409        snprintf(outResult, maxResultLength, "%d", mUnigramCount);
410    } else if (strncmp(query, BIGRAM_COUNT_QUERY, compareLength) == 0) {
411        snprintf(outResult, maxResultLength, "%d", mBigramCount);
412    } else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, compareLength) == 0) {
413        snprintf(outResult, maxResultLength, "%d",
414                mHeaderPolicy->isDecayingDict() ?
415                        ForgettingCurveUtils::getUnigramCountHardLimit(
416                                mHeaderPolicy->getMaxUnigramCount()) :
417                        static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
418    } else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, compareLength) == 0) {
419        snprintf(outResult, maxResultLength, "%d",
420                mHeaderPolicy->isDecayingDict() ?
421                        ForgettingCurveUtils::getBigramCountHardLimit(
422                                mHeaderPolicy->getMaxBigramCount()) :
423                        static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
424    }
425}
426
427const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const codePoints,
428        const int codePointCount) const {
429    const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount,
430            false /* forceLowerCaseSearch */);
431    if (ptNodePos == NOT_A_DICT_POS) {
432        AKLOGE("getWordProperty is called for invalid word.");
433        return WordProperty();
434    }
435    const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
436    std::vector<int> codePointVector(ptNodeParams.getCodePoints(),
437            ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount());
438    const ProbabilityEntry probabilityEntry =
439            mBuffers->getProbabilityDictContent()->getProbabilityEntry(
440                    ptNodeParams.getTerminalId());
441    const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
442    // Fetch bigram information.
443    std::vector<BigramProperty> bigrams;
444    const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
445    if (bigramListPos != NOT_A_DICT_POS) {
446        int bigramWord1CodePoints[MAX_WORD_LENGTH];
447        const BigramDictContent *const bigramDictContent = mBuffers->getBigramDictContent();
448        const TerminalPositionLookupTable *const terminalPositionLookupTable =
449                mBuffers->getTerminalPositionLookupTable();
450        bool hasNext = true;
451        int readingPos = bigramListPos;
452        while (hasNext) {
453            const BigramEntry bigramEntry =
454                    bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
455            hasNext = bigramEntry.hasNext();
456            const int word1TerminalId = bigramEntry.getTargetTerminalId();
457            const int word1TerminalPtNodePos =
458                    terminalPositionLookupTable->getTerminalPtNodePosition(word1TerminalId);
459            if (word1TerminalPtNodePos == NOT_A_DICT_POS) {
460                continue;
461            }
462            // Word (unigram) probability
463            int word1Probability = NOT_A_PROBABILITY;
464            const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
465                    word1TerminalPtNodePos, MAX_WORD_LENGTH, bigramWord1CodePoints,
466                    &word1Probability);
467            const std::vector<int> word1(bigramWord1CodePoints,
468                    bigramWord1CodePoints + codePointCount);
469            const HistoricalInfo *const historicalInfo = bigramEntry.getHistoricalInfo();
470            const int probability = bigramEntry.hasHistoricalInfo() ?
471                    ForgettingCurveUtils::decodeProbability(
472                            bigramEntry.getHistoricalInfo(), mHeaderPolicy) :
473                    bigramEntry.getProbability();
474            bigrams.emplace_back(&word1, probability,
475                    historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
476                    historicalInfo->getCount());
477        }
478    }
479    // Fetch shortcut information.
480    std::vector<UnigramProperty::ShortcutProperty> shortcuts;
481    int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
482    if (shortcutPos != NOT_A_DICT_POS) {
483        int shortcutTarget[MAX_WORD_LENGTH];
484        const ShortcutDictContent *const shortcutDictContent =
485                mBuffers->getShortcutDictContent();
486        bool hasNext = true;
487        while (hasNext) {
488            int shortcutTargetLength = 0;
489            int shortcutProbability = NOT_A_PROBABILITY;
490            shortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, shortcutTarget,
491                    &shortcutTargetLength, &shortcutProbability, &hasNext, &shortcutPos);
492            const std::vector<int> target(shortcutTarget, shortcutTarget + shortcutTargetLength);
493            shortcuts.emplace_back(&target, shortcutProbability);
494        }
495    }
496    const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
497            ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
498            historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
499            historicalInfo->getCount(), &shortcuts);
500    return WordProperty(&codePointVector, &unigramProperty, &bigrams);
501}
502
503int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,
504        int *const outCodePointCount) {
505    *outCodePointCount = 0;
506    if (token == 0) {
507        mTerminalPtNodePositionsForIteratingWords.clear();
508        DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy(
509                &mTerminalPtNodePositionsForIteratingWords);
510        DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
511        readingHelper.initWithPtNodeArrayPos(getRootPosition());
512        readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy);
513    }
514    const int terminalPtNodePositionsVectorSize =
515            static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size());
516    if (token < 0 || token >= terminalPtNodePositionsVectorSize) {
517        AKLOGE("Given token %d is invalid.", token);
518        return 0;
519    }
520    const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
521    int unigramProbability = NOT_A_PROBABILITY;
522    *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
523            terminalPtNodePos, MAX_WORD_LENGTH, outCodePoints, &unigramProbability);
524    const int nextToken = token + 1;
525    if (nextToken >= terminalPtNodePositionsVectorSize) {
526        // All words have been iterated.
527        mTerminalPtNodePositionsForIteratingWords.clear();
528        return 0;
529    }
530    return nextToken;
531}
532
533} // namespace latinime
534