ver4_patricia_trie_policy.cpp revision bd1f59bda5ad0b7028ec06c2de078f1623e76cdd
1/*
2 * Copyright (C) 2013, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!!
19 * Do not edit this file other than updating policy's interface.
20 *
21 * This file was generated from
22 *   suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
23 */
24
25#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h"
26
27#include <vector>
28
29#include "suggest/core/dicnode/dic_node.h"
30#include "suggest/core/dicnode/dic_node_vector.h"
31#include "suggest/core/dictionary/ngram_listener.h"
32#include "suggest/core/dictionary/property/bigram_property.h"
33#include "suggest/core/dictionary/property/unigram_property.h"
34#include "suggest/core/dictionary/property/word_property.h"
35#include "suggest/core/session/prev_words_info.h"
36#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
37#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h"
38#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
39#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
40
41namespace latinime {
42namespace backward {
43namespace v402 {
44
45// Note that there are corresponding definitions in Java side in BinaryDictionaryTests and
46// BinaryDictionaryDecayingTests.
47const char *const Ver4PatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
48const char *const Ver4PatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
49const char *const Ver4PatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
50const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
51const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024;
52const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
53        Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
54
55void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
56        DicNodeVector *const childDicNodes) const {
57    if (!dicNode->hasChildren()) {
58        return;
59    }
60    DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
61    readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
62    while (!readingHelper.isEnd()) {
63        const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams();
64        if (!ptNodeParams.isValid()) {
65            break;
66        }
67        bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
68        if (isTerminal && mHeaderPolicy->isDecayingDict()) {
69            // A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
70            // probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
71            // valid terminal DicNode.
72            isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY;
73        }
74        readingHelper.readNextSiblingNode(ptNodeParams);
75        if (ptNodeParams.representsNonWordInfo()) {
76            // Skip PtNodes that represent non-word information.
77            continue;
78        }
79        childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(),
80                ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal,
81                ptNodeParams.hasChildren(),
82                ptNodeParams.isBlacklisted()
83                        || ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */,
84                ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
85    }
86    if (readingHelper.isError()) {
87        mIsCorrupted = true;
88        AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
89    }
90}
91
92int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
93        const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
94        int *const outUnigramProbability) const {
95    DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
96    readingHelper.initWithPtNodePos(ptNodePos);
97    const int codePointCount =  readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(
98            maxCodePointCount, outCodePoints, outUnigramProbability);
99    if (readingHelper.isError()) {
100        mIsCorrupted = true;
101        AKLOGE("Dictionary reading error in getCodePointsAndProbabilityAndReturnCodePointCount().");
102    }
103    return codePointCount;
104}
105
106int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
107        const int length, const bool forceLowerCaseSearch) const {
108    DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
109    readingHelper.initWithPtNodeArrayPos(getRootPosition());
110    const int ptNodePos =
111            readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
112    if (readingHelper.isError()) {
113        mIsCorrupted = true;
114        AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
115    }
116    return ptNodePos;
117}
118
119int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
120        const int bigramProbability) const {
121    if (mHeaderPolicy->isDecayingDict()) {
122        // Both probabilities are encoded. Decode them and get probability.
123        return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability);
124    } else {
125        if (unigramProbability == NOT_A_PROBABILITY) {
126            return NOT_A_PROBABILITY;
127        } else if (bigramProbability == NOT_A_PROBABILITY) {
128            return ProbabilityUtils::backoff(unigramProbability);
129        } else {
130            return bigramProbability;
131        }
132    }
133}
134
135int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const int *const prevWordsPtNodePos,
136        const int ptNodePos) const {
137    if (ptNodePos == NOT_A_DICT_POS) {
138        return NOT_A_PROBABILITY;
139    }
140    const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
141    if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
142        return NOT_A_PROBABILITY;
143    }
144    if (prevWordsPtNodePos) {
145        BinaryDictionaryBigramsIterator bigramsIt =
146                getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]);
147        while (bigramsIt.hasNext()) {
148            bigramsIt.next();
149            if (bigramsIt.getBigramPos() == ptNodePos
150                    && bigramsIt.getProbability() != NOT_A_PROBABILITY) {
151                return getProbability(ptNodeParams.getProbability(), bigramsIt.getProbability());
152            }
153        }
154        return NOT_A_PROBABILITY;
155    }
156    return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
157}
158
159void Ver4PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordsPtNodePos,
160        NgramListener *const listener) const {
161    if (!prevWordsPtNodePos) {
162        return;
163    }
164    BinaryDictionaryBigramsIterator bigramsIt = getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]);
165    while (bigramsIt.hasNext()) {
166        bigramsIt.next();
167        listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
168    }
169}
170
171int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
172    if (ptNodePos == NOT_A_DICT_POS) {
173        return NOT_A_DICT_POS;
174    }
175    const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
176    if (ptNodeParams.isDeleted()) {
177        return NOT_A_DICT_POS;
178    }
179    return mBuffers->getShortcutDictContent()->getShortcutListHeadPos(
180            ptNodeParams.getTerminalId());
181}
182
183BinaryDictionaryBigramsIterator Ver4PatriciaTriePolicy::getBigramsIteratorOfPtNode(
184        const int ptNodePos) const {
185    const int bigramsPosition = getBigramsPositionOfPtNode(ptNodePos);
186    return BinaryDictionaryBigramsIterator(&mBigramPolicy, bigramsPosition);
187}
188
189int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
190    if (ptNodePos == NOT_A_DICT_POS) {
191        return NOT_A_DICT_POS;
192    }
193    const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
194    if (ptNodeParams.isDeleted()) {
195        return NOT_A_DICT_POS;
196    }
197    return mBuffers->getBigramDictContent()->getBigramListHeadPos(
198            ptNodeParams.getTerminalId());
199}
200
201bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int length,
202        const UnigramProperty *const unigramProperty) {
203    if (!mBuffers->isUpdatable()) {
204        AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary.");
205        return false;
206    }
207    if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
208        AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
209                mDictBuffer->getTailPosition());
210        return false;
211    }
212    if (length > MAX_WORD_LENGTH) {
213        AKLOGE("The word is too long to insert to the dictionary, length: %d", length);
214        return false;
215    }
216    for (const auto &shortcut : unigramProperty->getShortcuts()) {
217        if (shortcut.getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
218            AKLOGE("One of shortcut targets is too long to insert to the dictionary, length: %d",
219                    shortcut.getTargetCodePoints()->size());
220            return false;
221        }
222    }
223    DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
224    readingHelper.initWithPtNodeArrayPos(getRootPosition());
225    bool addedNewUnigram = false;
226    int codePointsToAdd[MAX_WORD_LENGTH];
227    int codePointCountToAdd = length;
228    memmove(codePointsToAdd, word, sizeof(int) * length);
229    if (unigramProperty->representsBeginningOfSentence()) {
230        codePointCountToAdd = CharUtils::attachBeginningOfSentenceMarker(codePointsToAdd,
231                codePointCountToAdd, MAX_WORD_LENGTH);
232    }
233    if (codePointCountToAdd <= 0) {
234        return false;
235    }
236    if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointsToAdd, codePointCountToAdd,
237            unigramProperty, &addedNewUnigram)) {
238        if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) {
239            mUnigramCount++;
240        }
241        if (unigramProperty->getShortcuts().size() > 0) {
242            // Add shortcut target.
243            const int wordPos = getTerminalPtNodePositionOfWord(word, length,
244                    false /* forceLowerCaseSearch */);
245            if (wordPos == NOT_A_DICT_POS) {
246                AKLOGE("Cannot find terminal PtNode position to add shortcut target.");
247                return false;
248            }
249            for (const auto &shortcut : unigramProperty->getShortcuts()) {
250                if (!mUpdatingHelper.addShortcutTarget(wordPos,
251                        shortcut.getTargetCodePoints()->data(),
252                        shortcut.getTargetCodePoints()->size(), shortcut.getProbability())) {
253                    AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %d, "
254                            "probability: %d", wordPos, shortcut.getTargetCodePoints()->size(),
255                            shortcut.getProbability());
256                    return false;
257                }
258            }
259        }
260        return true;
261    } else {
262        return false;
263    }
264}
265
266bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
267        const BigramProperty *const bigramProperty) {
268    if (!mBuffers->isUpdatable()) {
269        AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
270        return false;
271    }
272    if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
273        AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
274                mDictBuffer->getTailPosition());
275        return false;
276    }
277    if (!prevWordsInfo->isValid()) {
278        AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary.");
279        return false;
280    }
281    if (bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
282        AKLOGE("The word is too long to insert the ngram to the dictionary. "
283                "length: %d", bigramProperty->getTargetCodePoints()->size());
284        return false;
285    }
286    int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
287    prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
288            false /* tryLowerCaseSearch */);
289    // TODO: Support N-gram.
290    if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
291        if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) {
292            const std::vector<UnigramProperty::ShortcutProperty> shortcuts;
293            const UnigramProperty beginningOfSentenceUnigramProperty(
294                    true /* representsBeginningOfSentence */, true /* isNotAWord */,
295                    false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
296                    NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
297            if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
298                    prevWordsInfo->getNthPrevWordCodePointCount(1 /* n */),
299                    &beginningOfSentenceUnigramProperty)) {
300                AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
301                return false;
302            }
303            // Refresh Terminal PtNode positions.
304            prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
305                    false /* tryLowerCaseSearch */);
306        } else {
307            return false;
308        }
309    }
310    const int word1Pos = getTerminalPtNodePositionOfWord(
311            bigramProperty->getTargetCodePoints()->data(),
312            bigramProperty->getTargetCodePoints()->size(), false /* forceLowerCaseSearch */);
313    if (word1Pos == NOT_A_DICT_POS) {
314        return false;
315    }
316    bool addedNewBigram = false;
317    if (mUpdatingHelper.addBigramWords(prevWordsPtNodePos[0], word1Pos, bigramProperty,
318            &addedNewBigram)) {
319        if (addedNewBigram) {
320            mBigramCount++;
321        }
322        return true;
323    } else {
324        return false;
325    }
326}
327
328bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
329        const int *const word, const int length) {
330    if (!mBuffers->isUpdatable()) {
331        AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
332        return false;
333    }
334    if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
335        AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
336                mDictBuffer->getTailPosition());
337        return false;
338    }
339    if (!prevWordsInfo->isValid()) {
340        AKLOGE("prev words info is not valid for removing n-gram entry form the dictionary.");
341        return false;
342    }
343    if (length > MAX_WORD_LENGTH) {
344        AKLOGE("word is too long to remove n-gram entry form the dictionary. length: %d", length);
345    }
346    int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
347    prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
348            false /* tryLowerCaseSerch */);
349    // TODO: Support N-gram.
350    if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
351        return false;
352    }
353    const int wordPos = getTerminalPtNodePositionOfWord(word, length,
354            false /* forceLowerCaseSearch */);
355    if (wordPos == NOT_A_DICT_POS) {
356        return false;
357    }
358    if (mUpdatingHelper.removeBigramWords(prevWordsPtNodePos[0], wordPos)) {
359        mBigramCount--;
360        return true;
361    } else {
362        return false;
363    }
364}
365
366bool Ver4PatriciaTriePolicy::flush(const char *const filePath) {
367    if (!mBuffers->isUpdatable()) {
368        AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
369        return false;
370    }
371    if (!mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount)) {
372        AKLOGE("Cannot flush the dictionary to file.");
373        mIsCorrupted = true;
374        return false;
375    }
376    return true;
377}
378
379bool Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
380    if (!mBuffers->isUpdatable()) {
381        AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
382        return false;
383    }
384    if (!mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath)) {
385        AKLOGE("Cannot flush the dictionary to file with GC.");
386        mIsCorrupted = true;
387        return false;
388    }
389    return true;
390}
391
392bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
393    if (!mBuffers->isUpdatable()) {
394        AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
395        return false;
396    }
397    if (mBuffers->isNearSizeLimit()) {
398        // Additional buffer size is near the limit.
399        return true;
400    } else if (mHeaderPolicy->getExtendedRegionSize() + mDictBuffer->getUsedAdditionalBufferSize()
401            > Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE) {
402        // Total extended region size of the trie exceeds the limit.
403        return true;
404    } else if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS
405            && mDictBuffer->getUsedAdditionalBufferSize() > 0) {
406        // Needs to reduce dictionary size.
407        return true;
408    } else if (mHeaderPolicy->isDecayingDict()) {
409        return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mUnigramCount, mBigramCount,
410                mHeaderPolicy);
411    }
412    return false;
413}
414
415void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int queryLength,
416        char *const outResult, const int maxResultLength) {
417    const int compareLength = queryLength + 1 /* terminator */;
418    if (strncmp(query, UNIGRAM_COUNT_QUERY, compareLength) == 0) {
419        snprintf(outResult, maxResultLength, "%d", mUnigramCount);
420    } else if (strncmp(query, BIGRAM_COUNT_QUERY, compareLength) == 0) {
421        snprintf(outResult, maxResultLength, "%d", mBigramCount);
422    } else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, compareLength) == 0) {
423        snprintf(outResult, maxResultLength, "%d",
424                mHeaderPolicy->isDecayingDict() ?
425                        ForgettingCurveUtils::getUnigramCountHardLimit(
426                                mHeaderPolicy->getMaxUnigramCount()) :
427                        static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
428    } else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, compareLength) == 0) {
429        snprintf(outResult, maxResultLength, "%d",
430                mHeaderPolicy->isDecayingDict() ?
431                        ForgettingCurveUtils::getBigramCountHardLimit(
432                                mHeaderPolicy->getMaxBigramCount()) :
433                        static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
434    }
435}
436
437const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const codePoints,
438        const int codePointCount) const {
439    const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount,
440            false /* forceLowerCaseSearch */);
441    if (ptNodePos == NOT_A_DICT_POS) {
442        AKLOGE("getWordProperty is called for invalid word.");
443        return WordProperty();
444    }
445    const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
446    std::vector<int> codePointVector(ptNodeParams.getCodePoints(),
447            ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount());
448    const ProbabilityEntry probabilityEntry =
449            mBuffers->getProbabilityDictContent()->getProbabilityEntry(
450                    ptNodeParams.getTerminalId());
451    const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
452    // Fetch bigram information.
453    std::vector<BigramProperty> bigrams;
454    const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
455    if (bigramListPos != NOT_A_DICT_POS) {
456        int bigramWord1CodePoints[MAX_WORD_LENGTH];
457        const BigramDictContent *const bigramDictContent = mBuffers->getBigramDictContent();
458        const TerminalPositionLookupTable *const terminalPositionLookupTable =
459                mBuffers->getTerminalPositionLookupTable();
460        bool hasNext = true;
461        int readingPos = bigramListPos;
462        while (hasNext) {
463            const BigramEntry bigramEntry =
464                    bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
465            hasNext = bigramEntry.hasNext();
466            const int word1TerminalId = bigramEntry.getTargetTerminalId();
467            const int word1TerminalPtNodePos =
468                    terminalPositionLookupTable->getTerminalPtNodePosition(word1TerminalId);
469            if (word1TerminalPtNodePos == NOT_A_DICT_POS) {
470                continue;
471            }
472            // Word (unigram) probability
473            int word1Probability = NOT_A_PROBABILITY;
474            const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
475                    word1TerminalPtNodePos, MAX_WORD_LENGTH, bigramWord1CodePoints,
476                    &word1Probability);
477            const std::vector<int> word1(bigramWord1CodePoints,
478                    bigramWord1CodePoints + codePointCount);
479            const HistoricalInfo *const historicalInfo = bigramEntry.getHistoricalInfo();
480            const int probability = bigramEntry.hasHistoricalInfo() ?
481                    ForgettingCurveUtils::decodeProbability(
482                            bigramEntry.getHistoricalInfo(), mHeaderPolicy) :
483                    bigramEntry.getProbability();
484            bigrams.emplace_back(&word1, probability,
485                    historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
486                    historicalInfo->getCount());
487        }
488    }
489    // Fetch shortcut information.
490    std::vector<UnigramProperty::ShortcutProperty> shortcuts;
491    int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
492    if (shortcutPos != NOT_A_DICT_POS) {
493        int shortcutTarget[MAX_WORD_LENGTH];
494        const ShortcutDictContent *const shortcutDictContent =
495                mBuffers->getShortcutDictContent();
496        bool hasNext = true;
497        while (hasNext) {
498            int shortcutTargetLength = 0;
499            int shortcutProbability = NOT_A_PROBABILITY;
500            shortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, shortcutTarget,
501                    &shortcutTargetLength, &shortcutProbability, &hasNext, &shortcutPos);
502            const std::vector<int> target(shortcutTarget, shortcutTarget + shortcutTargetLength);
503            shortcuts.emplace_back(&target, shortcutProbability);
504        }
505    }
506    const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
507            ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
508            historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
509            historicalInfo->getCount(), &shortcuts);
510    return WordProperty(&codePointVector, &unigramProperty, &bigrams);
511}
512
513int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,
514        int *const outCodePointCount) {
515    *outCodePointCount = 0;
516    if (token == 0) {
517        mTerminalPtNodePositionsForIteratingWords.clear();
518        DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy(
519                &mTerminalPtNodePositionsForIteratingWords);
520        DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
521        readingHelper.initWithPtNodeArrayPos(getRootPosition());
522        readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy);
523    }
524    const int terminalPtNodePositionsVectorSize =
525            static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size());
526    if (token < 0 || token >= terminalPtNodePositionsVectorSize) {
527        AKLOGE("Given token %d is invalid.", token);
528        return 0;
529    }
530    const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
531    int unigramProbability = NOT_A_PROBABILITY;
532    *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
533            terminalPtNodePos, MAX_WORD_LENGTH, outCodePoints, &unigramProbability);
534    const int nextToken = token + 1;
535    if (nextToken >= terminalPtNodePositionsVectorSize) {
536        // All words have been iterated.
537        mTerminalPtNodePositionsForIteratingWords.clear();
538        return 0;
539    }
540    return nextToken;
541}
542
543} // namespace v402
544} // namespace backward
545} // namespace latinime
546