ver4_patricia_trie_policy.cpp revision 9069d30043d5182dfd38465ad9bbc11ad73fab7c
1/*
2 * Copyright (C) 2013, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!!
19 * Do not edit this file other than updating policy's interface.
20 *
21 * This file was generated from
22 *   suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
23 */
24
25#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h"
26
27#include <vector>
28
29#include "suggest/core/dicnode/dic_node.h"
30#include "suggest/core/dicnode/dic_node_vector.h"
31#include "suggest/core/dictionary/ngram_listener.h"
32#include "suggest/core/dictionary/property/bigram_property.h"
33#include "suggest/core/dictionary/property/unigram_property.h"
34#include "suggest/core/dictionary/property/word_property.h"
35#include "suggest/core/session/prev_words_info.h"
36#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
37#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h"
38#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
39#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
40
41namespace latinime {
42namespace backward {
43namespace v402 {
44
45// Note that there are corresponding definitions in Java side in BinaryDictionaryTests and
46// BinaryDictionaryDecayingTests.
47const char *const Ver4PatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
48const char *const Ver4PatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
49const char *const Ver4PatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
50const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
51const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024;
52const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
53        Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
54
55void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
56        DicNodeVector *const childDicNodes) const {
57    if (!dicNode->hasChildren()) {
58        return;
59    }
60    DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
61    readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
62    while (!readingHelper.isEnd()) {
63        const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams();
64        if (!ptNodeParams.isValid()) {
65            break;
66        }
67        bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
68        if (isTerminal && mHeaderPolicy->isDecayingDict()) {
69            // A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
70            // probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
71            // valid terminal DicNode.
72            isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY;
73        }
74        readingHelper.readNextSiblingNode(ptNodeParams);
75        if (ptNodeParams.representsNonWordInfo()) {
76            // Skip PtNodes that represent non-word information.
77            continue;
78        }
79        childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(),
80                ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal,
81                ptNodeParams.hasChildren(),
82                ptNodeParams.isBlacklisted()
83                        || ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */,
84                ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
85    }
86    if (readingHelper.isError()) {
87        mIsCorrupted = true;
88        AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
89    }
90}
91
92int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
93        const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
94        int *const outUnigramProbability) const {
95    DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
96    readingHelper.initWithPtNodePos(ptNodePos);
97    const int codePointCount =  readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(
98            maxCodePointCount, outCodePoints, outUnigramProbability);
99    if (readingHelper.isError()) {
100        mIsCorrupted = true;
101        AKLOGE("Dictionary reading error in getCodePointsAndProbabilityAndReturnCodePointCount().");
102    }
103    return codePointCount;
104}
105
106int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
107        const int length, const bool forceLowerCaseSearch) const {
108    DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
109    readingHelper.initWithPtNodeArrayPos(getRootPosition());
110    const int ptNodePos =
111            readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
112    if (readingHelper.isError()) {
113        mIsCorrupted = true;
114        AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
115    }
116    return ptNodePos;
117}
118
119int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
120        const int bigramProbability) const {
121    if (mHeaderPolicy->isDecayingDict()) {
122        // Both probabilities are encoded. Decode them and get probability.
123        return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability);
124    } else {
125        if (unigramProbability == NOT_A_PROBABILITY) {
126            return NOT_A_PROBABILITY;
127        } else if (bigramProbability == NOT_A_PROBABILITY) {
128            return ProbabilityUtils::backoff(unigramProbability);
129        } else {
130            return bigramProbability;
131        }
132    }
133}
134
135int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const int *const prevWordsPtNodePos,
136        const int ptNodePos) const {
137    if (ptNodePos == NOT_A_DICT_POS) {
138        return NOT_A_PROBABILITY;
139    }
140    const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
141    if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
142        return NOT_A_PROBABILITY;
143    }
144    if (prevWordsPtNodePos) {
145        const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]);
146        BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition);
147        while (bigramsIt.hasNext()) {
148            bigramsIt.next();
149            if (bigramsIt.getBigramPos() == ptNodePos
150                    && bigramsIt.getProbability() != NOT_A_PROBABILITY) {
151                return getProbability(ptNodeParams.getProbability(), bigramsIt.getProbability());
152            }
153        }
154        return NOT_A_PROBABILITY;
155    }
156    return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
157}
158
159void Ver4PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordsPtNodePos,
160        NgramListener *const listener) const {
161    if (!prevWordsPtNodePos) {
162        return;
163    }
164    const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]);
165    BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition);
166    while (bigramsIt.hasNext()) {
167        bigramsIt.next();
168        listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
169    }
170}
171
172int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
173    if (ptNodePos == NOT_A_DICT_POS) {
174        return NOT_A_DICT_POS;
175    }
176    const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
177    if (ptNodeParams.isDeleted()) {
178        return NOT_A_DICT_POS;
179    }
180    return mBuffers->getShortcutDictContent()->getShortcutListHeadPos(
181            ptNodeParams.getTerminalId());
182}
183
184int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
185    if (ptNodePos == NOT_A_DICT_POS) {
186        return NOT_A_DICT_POS;
187    }
188    const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
189    if (ptNodeParams.isDeleted()) {
190        return NOT_A_DICT_POS;
191    }
192    return mBuffers->getBigramDictContent()->getBigramListHeadPos(
193            ptNodeParams.getTerminalId());
194}
195
196bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int length,
197        const UnigramProperty *const unigramProperty) {
198    if (!mBuffers->isUpdatable()) {
199        AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary.");
200        return false;
201    }
202    if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
203        AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
204                mDictBuffer->getTailPosition());
205        return false;
206    }
207    if (length > MAX_WORD_LENGTH) {
208        AKLOGE("The word is too long to insert to the dictionary, length: %d", length);
209        return false;
210    }
211    for (const auto &shortcut : unigramProperty->getShortcuts()) {
212        if (shortcut.getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
213            AKLOGE("One of shortcut targets is too long to insert to the dictionary, length: %d",
214                    shortcut.getTargetCodePoints()->size());
215            return false;
216        }
217    }
218    DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
219    readingHelper.initWithPtNodeArrayPos(getRootPosition());
220    bool addedNewUnigram = false;
221    int codePointsToAdd[MAX_WORD_LENGTH];
222    int codePointCountToAdd = length;
223    memmove(codePointsToAdd, word, sizeof(int) * length);
224    if (unigramProperty->representsBeginningOfSentence()) {
225        codePointCountToAdd = CharUtils::attachBeginningOfSentenceMarker(codePointsToAdd,
226                codePointCountToAdd, MAX_WORD_LENGTH);
227    }
228    if (codePointCountToAdd <= 0) {
229        return false;
230    }
231    if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointsToAdd, codePointCountToAdd,
232            unigramProperty, &addedNewUnigram)) {
233        if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) {
234            mUnigramCount++;
235        }
236        if (unigramProperty->getShortcuts().size() > 0) {
237            // Add shortcut target.
238            const int wordPos = getTerminalPtNodePositionOfWord(word, length,
239                    false /* forceLowerCaseSearch */);
240            if (wordPos == NOT_A_DICT_POS) {
241                AKLOGE("Cannot find terminal PtNode position to add shortcut target.");
242                return false;
243            }
244            for (const auto &shortcut : unigramProperty->getShortcuts()) {
245                if (!mUpdatingHelper.addShortcutTarget(wordPos,
246                        shortcut.getTargetCodePoints()->data(),
247                        shortcut.getTargetCodePoints()->size(), shortcut.getProbability())) {
248                    AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %d, "
249                            "probability: %d", wordPos, shortcut.getTargetCodePoints()->size(),
250                            shortcut.getProbability());
251                    return false;
252                }
253            }
254        }
255        return true;
256    } else {
257        return false;
258    }
259}
260
261bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
262        const BigramProperty *const bigramProperty) {
263    if (!mBuffers->isUpdatable()) {
264        AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
265        return false;
266    }
267    if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
268        AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
269                mDictBuffer->getTailPosition());
270        return false;
271    }
272    if (!prevWordsInfo->isValid()) {
273        AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary.");
274        return false;
275    }
276    if (bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
277        AKLOGE("The word is too long to insert the ngram to the dictionary. "
278                "length: %d", bigramProperty->getTargetCodePoints()->size());
279        return false;
280    }
281    int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
282    prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
283            false /* tryLowerCaseSearch */);
284    // TODO: Support N-gram.
285    if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
286        if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) {
287            const std::vector<UnigramProperty::ShortcutProperty> shortcuts;
288            const UnigramProperty beginningOfSentenceUnigramProperty(
289                    true /* representsBeginningOfSentence */, true /* isNotAWord */,
290                    false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
291                    NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
292            if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
293                    prevWordsInfo->getNthPrevWordCodePointCount(1 /* n */),
294                    &beginningOfSentenceUnigramProperty)) {
295                AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
296                return false;
297            }
298            // Refresh Terminal PtNode positions.
299            prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
300                    false /* tryLowerCaseSearch */);
301        } else {
302            return false;
303        }
304    }
305    const int word1Pos = getTerminalPtNodePositionOfWord(
306            bigramProperty->getTargetCodePoints()->data(),
307            bigramProperty->getTargetCodePoints()->size(), false /* forceLowerCaseSearch */);
308    if (word1Pos == NOT_A_DICT_POS) {
309        return false;
310    }
311    bool addedNewBigram = false;
312    if (mUpdatingHelper.addNgramEntry(PtNodePosArrayView::fromObject(prevWordsPtNodePos),
313            word1Pos, bigramProperty, &addedNewBigram)) {
314        if (addedNewBigram) {
315            mBigramCount++;
316        }
317        return true;
318    } else {
319        return false;
320    }
321}
322
323bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
324        const int *const word, const int length) {
325    if (!mBuffers->isUpdatable()) {
326        AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
327        return false;
328    }
329    if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
330        AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
331                mDictBuffer->getTailPosition());
332        return false;
333    }
334    if (!prevWordsInfo->isValid()) {
335        AKLOGE("prev words info is not valid for removing n-gram entry form the dictionary.");
336        return false;
337    }
338    if (length > MAX_WORD_LENGTH) {
339        AKLOGE("word is too long to remove n-gram entry form the dictionary. length: %d", length);
340    }
341    int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
342    prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
343            false /* tryLowerCaseSerch */);
344    // TODO: Support N-gram.
345    if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
346        return false;
347    }
348    const int wordPos = getTerminalPtNodePositionOfWord(word, length,
349            false /* forceLowerCaseSearch */);
350    if (wordPos == NOT_A_DICT_POS) {
351        return false;
352    }
353    if (mUpdatingHelper.removeNgramEntry(
354            PtNodePosArrayView::fromObject(prevWordsPtNodePos), wordPos)) {
355        mBigramCount--;
356        return true;
357    } else {
358        return false;
359    }
360}
361
362bool Ver4PatriciaTriePolicy::flush(const char *const filePath) {
363    if (!mBuffers->isUpdatable()) {
364        AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
365        return false;
366    }
367    if (!mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount)) {
368        AKLOGE("Cannot flush the dictionary to file.");
369        mIsCorrupted = true;
370        return false;
371    }
372    return true;
373}
374
375bool Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
376    if (!mBuffers->isUpdatable()) {
377        AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
378        return false;
379    }
380    if (!mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath)) {
381        AKLOGE("Cannot flush the dictionary to file with GC.");
382        mIsCorrupted = true;
383        return false;
384    }
385    return true;
386}
387
388bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
389    if (!mBuffers->isUpdatable()) {
390        AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
391        return false;
392    }
393    if (mBuffers->isNearSizeLimit()) {
394        // Additional buffer size is near the limit.
395        return true;
396    } else if (mHeaderPolicy->getExtendedRegionSize() + mDictBuffer->getUsedAdditionalBufferSize()
397            > Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE) {
398        // Total extended region size of the trie exceeds the limit.
399        return true;
400    } else if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS
401            && mDictBuffer->getUsedAdditionalBufferSize() > 0) {
402        // Needs to reduce dictionary size.
403        return true;
404    } else if (mHeaderPolicy->isDecayingDict()) {
405        return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mUnigramCount, mBigramCount,
406                mHeaderPolicy);
407    }
408    return false;
409}
410
411void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int queryLength,
412        char *const outResult, const int maxResultLength) {
413    const int compareLength = queryLength + 1 /* terminator */;
414    if (strncmp(query, UNIGRAM_COUNT_QUERY, compareLength) == 0) {
415        snprintf(outResult, maxResultLength, "%d", mUnigramCount);
416    } else if (strncmp(query, BIGRAM_COUNT_QUERY, compareLength) == 0) {
417        snprintf(outResult, maxResultLength, "%d", mBigramCount);
418    } else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, compareLength) == 0) {
419        snprintf(outResult, maxResultLength, "%d",
420                mHeaderPolicy->isDecayingDict() ?
421                        ForgettingCurveUtils::getUnigramCountHardLimit(
422                                mHeaderPolicy->getMaxUnigramCount()) :
423                        static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
424    } else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, compareLength) == 0) {
425        snprintf(outResult, maxResultLength, "%d",
426                mHeaderPolicy->isDecayingDict() ?
427                        ForgettingCurveUtils::getBigramCountHardLimit(
428                                mHeaderPolicy->getMaxBigramCount()) :
429                        static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
430    }
431}
432
433const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const codePoints,
434        const int codePointCount) const {
435    const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount,
436            false /* forceLowerCaseSearch */);
437    if (ptNodePos == NOT_A_DICT_POS) {
438        AKLOGE("getWordProperty is called for invalid word.");
439        return WordProperty();
440    }
441    const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
442    std::vector<int> codePointVector(ptNodeParams.getCodePoints(),
443            ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount());
444    const ProbabilityEntry probabilityEntry =
445            mBuffers->getProbabilityDictContent()->getProbabilityEntry(
446                    ptNodeParams.getTerminalId());
447    const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
448    // Fetch bigram information.
449    std::vector<BigramProperty> bigrams;
450    const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
451    if (bigramListPos != NOT_A_DICT_POS) {
452        int bigramWord1CodePoints[MAX_WORD_LENGTH];
453        const BigramDictContent *const bigramDictContent = mBuffers->getBigramDictContent();
454        const TerminalPositionLookupTable *const terminalPositionLookupTable =
455                mBuffers->getTerminalPositionLookupTable();
456        bool hasNext = true;
457        int readingPos = bigramListPos;
458        while (hasNext) {
459            const BigramEntry bigramEntry =
460                    bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
461            hasNext = bigramEntry.hasNext();
462            const int word1TerminalId = bigramEntry.getTargetTerminalId();
463            const int word1TerminalPtNodePos =
464                    terminalPositionLookupTable->getTerminalPtNodePosition(word1TerminalId);
465            if (word1TerminalPtNodePos == NOT_A_DICT_POS) {
466                continue;
467            }
468            // Word (unigram) probability
469            int word1Probability = NOT_A_PROBABILITY;
470            const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
471                    word1TerminalPtNodePos, MAX_WORD_LENGTH, bigramWord1CodePoints,
472                    &word1Probability);
473            const std::vector<int> word1(bigramWord1CodePoints,
474                    bigramWord1CodePoints + codePointCount);
475            const HistoricalInfo *const historicalInfo = bigramEntry.getHistoricalInfo();
476            const int probability = bigramEntry.hasHistoricalInfo() ?
477                    ForgettingCurveUtils::decodeProbability(
478                            bigramEntry.getHistoricalInfo(), mHeaderPolicy) :
479                    bigramEntry.getProbability();
480            bigrams.emplace_back(&word1, probability,
481                    historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
482                    historicalInfo->getCount());
483        }
484    }
485    // Fetch shortcut information.
486    std::vector<UnigramProperty::ShortcutProperty> shortcuts;
487    int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
488    if (shortcutPos != NOT_A_DICT_POS) {
489        int shortcutTarget[MAX_WORD_LENGTH];
490        const ShortcutDictContent *const shortcutDictContent =
491                mBuffers->getShortcutDictContent();
492        bool hasNext = true;
493        while (hasNext) {
494            int shortcutTargetLength = 0;
495            int shortcutProbability = NOT_A_PROBABILITY;
496            shortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, shortcutTarget,
497                    &shortcutTargetLength, &shortcutProbability, &hasNext, &shortcutPos);
498            const std::vector<int> target(shortcutTarget, shortcutTarget + shortcutTargetLength);
499            shortcuts.emplace_back(&target, shortcutProbability);
500        }
501    }
502    const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
503            ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
504            historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
505            historicalInfo->getCount(), &shortcuts);
506    return WordProperty(&codePointVector, &unigramProperty, &bigrams);
507}
508
509int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,
510        int *const outCodePointCount) {
511    *outCodePointCount = 0;
512    if (token == 0) {
513        mTerminalPtNodePositionsForIteratingWords.clear();
514        DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy(
515                &mTerminalPtNodePositionsForIteratingWords);
516        DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
517        readingHelper.initWithPtNodeArrayPos(getRootPosition());
518        readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy);
519    }
520    const int terminalPtNodePositionsVectorSize =
521            static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size());
522    if (token < 0 || token >= terminalPtNodePositionsVectorSize) {
523        AKLOGE("Given token %d is invalid.", token);
524        return 0;
525    }
526    const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
527    int unigramProbability = NOT_A_PROBABILITY;
528    *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
529            terminalPtNodePos, MAX_WORD_LENGTH, outCodePoints, &unigramProbability);
530    const int nextToken = token + 1;
531    if (nextToken >= terminalPtNodePositionsVectorSize) {
532        // All words have been iterated.
533        mTerminalPtNodePositionsForIteratingWords.clear();
534        return 0;
535    }
536    return nextToken;
537}
538
539} // namespace v402
540} // namespace backward
541} // namespace latinime
542