ver4_patricia_trie_policy.cpp revision 9069d30043d5182dfd38465ad9bbc11ad73fab7c
1/* 2 * Copyright (C) 2013, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* 18 * !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!! 19 * Do not edit this file other than updating policy's interface. 20 * 21 * This file was generated from 22 * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp 23 */ 24 25#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h" 26 27#include <vector> 28 29#include "suggest/core/dicnode/dic_node.h" 30#include "suggest/core/dicnode/dic_node_vector.h" 31#include "suggest/core/dictionary/ngram_listener.h" 32#include "suggest/core/dictionary/property/bigram_property.h" 33#include "suggest/core/dictionary/property/unigram_property.h" 34#include "suggest/core/dictionary/property/word_property.h" 35#include "suggest/core/session/prev_words_info.h" 36#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" 37#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h" 38#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" 39#include "suggest/policyimpl/dictionary/utils/probability_utils.h" 40 41namespace latinime { 42namespace backward { 43namespace v402 { 44 45// Note that there are corresponding definitions in Java side in BinaryDictionaryTests and 46// BinaryDictionaryDecayingTests. 47const char *const Ver4PatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT"; 48const char *const Ver4PatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT"; 49const char *const Ver4PatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT"; 50const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT"; 51const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024; 52const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS = 53 Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS; 54 55void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode, 56 DicNodeVector *const childDicNodes) const { 57 if (!dicNode->hasChildren()) { 58 return; 59 } 60 DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); 61 readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos()); 62 while (!readingHelper.isEnd()) { 63 const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams(); 64 if (!ptNodeParams.isValid()) { 65 break; 66 } 67 bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted(); 68 if (isTerminal && mHeaderPolicy->isDecayingDict()) { 69 // A DecayingDict may have a terminal PtNode that has a terminal DicNode whose 70 // probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a 71 // valid terminal DicNode. 72 isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY; 73 } 74 readingHelper.readNextSiblingNode(ptNodeParams); 75 if (ptNodeParams.representsNonWordInfo()) { 76 // Skip PtNodes that represent non-word information. 77 continue; 78 } 79 childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(), 80 ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal, 81 ptNodeParams.hasChildren(), 82 ptNodeParams.isBlacklisted() 83 || ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */, 84 ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints()); 85 } 86 if (readingHelper.isError()) { 87 mIsCorrupted = true; 88 AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes()."); 89 } 90} 91 92int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( 93 const int ptNodePos, const int maxCodePointCount, int *const outCodePoints, 94 int *const outUnigramProbability) const { 95 DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); 96 readingHelper.initWithPtNodePos(ptNodePos); 97 const int codePointCount = readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount( 98 maxCodePointCount, outCodePoints, outUnigramProbability); 99 if (readingHelper.isError()) { 100 mIsCorrupted = true; 101 AKLOGE("Dictionary reading error in getCodePointsAndProbabilityAndReturnCodePointCount()."); 102 } 103 return codePointCount; 104} 105 106int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, 107 const int length, const bool forceLowerCaseSearch) const { 108 DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); 109 readingHelper.initWithPtNodeArrayPos(getRootPosition()); 110 const int ptNodePos = 111 readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch); 112 if (readingHelper.isError()) { 113 mIsCorrupted = true; 114 AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes()."); 115 } 116 return ptNodePos; 117} 118 119int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability, 120 const int bigramProbability) const { 121 if (mHeaderPolicy->isDecayingDict()) { 122 // Both probabilities are encoded. Decode them and get probability. 123 return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability); 124 } else { 125 if (unigramProbability == NOT_A_PROBABILITY) { 126 return NOT_A_PROBABILITY; 127 } else if (bigramProbability == NOT_A_PROBABILITY) { 128 return ProbabilityUtils::backoff(unigramProbability); 129 } else { 130 return bigramProbability; 131 } 132 } 133} 134 135int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const int *const prevWordsPtNodePos, 136 const int ptNodePos) const { 137 if (ptNodePos == NOT_A_DICT_POS) { 138 return NOT_A_PROBABILITY; 139 } 140 const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos)); 141 if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) { 142 return NOT_A_PROBABILITY; 143 } 144 if (prevWordsPtNodePos) { 145 const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]); 146 BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition); 147 while (bigramsIt.hasNext()) { 148 bigramsIt.next(); 149 if (bigramsIt.getBigramPos() == ptNodePos 150 && bigramsIt.getProbability() != NOT_A_PROBABILITY) { 151 return getProbability(ptNodeParams.getProbability(), bigramsIt.getProbability()); 152 } 153 } 154 return NOT_A_PROBABILITY; 155 } 156 return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY); 157} 158 159void Ver4PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordsPtNodePos, 160 NgramListener *const listener) const { 161 if (!prevWordsPtNodePos) { 162 return; 163 } 164 const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]); 165 BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition); 166 while (bigramsIt.hasNext()) { 167 bigramsIt.next(); 168 listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos()); 169 } 170} 171 172int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const { 173 if (ptNodePos == NOT_A_DICT_POS) { 174 return NOT_A_DICT_POS; 175 } 176 const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos)); 177 if (ptNodeParams.isDeleted()) { 178 return NOT_A_DICT_POS; 179 } 180 return mBuffers->getShortcutDictContent()->getShortcutListHeadPos( 181 ptNodeParams.getTerminalId()); 182} 183 184int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { 185 if (ptNodePos == NOT_A_DICT_POS) { 186 return NOT_A_DICT_POS; 187 } 188 const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos)); 189 if (ptNodeParams.isDeleted()) { 190 return NOT_A_DICT_POS; 191 } 192 return mBuffers->getBigramDictContent()->getBigramListHeadPos( 193 ptNodeParams.getTerminalId()); 194} 195 196bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int length, 197 const UnigramProperty *const unigramProperty) { 198 if (!mBuffers->isUpdatable()) { 199 AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary."); 200 return false; 201 } 202 if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { 203 AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d", 204 mDictBuffer->getTailPosition()); 205 return false; 206 } 207 if (length > MAX_WORD_LENGTH) { 208 AKLOGE("The word is too long to insert to the dictionary, length: %d", length); 209 return false; 210 } 211 for (const auto &shortcut : unigramProperty->getShortcuts()) { 212 if (shortcut.getTargetCodePoints()->size() > MAX_WORD_LENGTH) { 213 AKLOGE("One of shortcut targets is too long to insert to the dictionary, length: %d", 214 shortcut.getTargetCodePoints()->size()); 215 return false; 216 } 217 } 218 DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); 219 readingHelper.initWithPtNodeArrayPos(getRootPosition()); 220 bool addedNewUnigram = false; 221 int codePointsToAdd[MAX_WORD_LENGTH]; 222 int codePointCountToAdd = length; 223 memmove(codePointsToAdd, word, sizeof(int) * length); 224 if (unigramProperty->representsBeginningOfSentence()) { 225 codePointCountToAdd = CharUtils::attachBeginningOfSentenceMarker(codePointsToAdd, 226 codePointCountToAdd, MAX_WORD_LENGTH); 227 } 228 if (codePointCountToAdd <= 0) { 229 return false; 230 } 231 if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointsToAdd, codePointCountToAdd, 232 unigramProperty, &addedNewUnigram)) { 233 if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) { 234 mUnigramCount++; 235 } 236 if (unigramProperty->getShortcuts().size() > 0) { 237 // Add shortcut target. 238 const int wordPos = getTerminalPtNodePositionOfWord(word, length, 239 false /* forceLowerCaseSearch */); 240 if (wordPos == NOT_A_DICT_POS) { 241 AKLOGE("Cannot find terminal PtNode position to add shortcut target."); 242 return false; 243 } 244 for (const auto &shortcut : unigramProperty->getShortcuts()) { 245 if (!mUpdatingHelper.addShortcutTarget(wordPos, 246 shortcut.getTargetCodePoints()->data(), 247 shortcut.getTargetCodePoints()->size(), shortcut.getProbability())) { 248 AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %d, " 249 "probability: %d", wordPos, shortcut.getTargetCodePoints()->size(), 250 shortcut.getProbability()); 251 return false; 252 } 253 } 254 } 255 return true; 256 } else { 257 return false; 258 } 259} 260 261bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo, 262 const BigramProperty *const bigramProperty) { 263 if (!mBuffers->isUpdatable()) { 264 AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary."); 265 return false; 266 } 267 if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { 268 AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d", 269 mDictBuffer->getTailPosition()); 270 return false; 271 } 272 if (!prevWordsInfo->isValid()) { 273 AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary."); 274 return false; 275 } 276 if (bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) { 277 AKLOGE("The word is too long to insert the ngram to the dictionary. " 278 "length: %d", bigramProperty->getTargetCodePoints()->size()); 279 return false; 280 } 281 int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; 282 prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos, 283 false /* tryLowerCaseSearch */); 284 // TODO: Support N-gram. 285 if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) { 286 if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) { 287 const std::vector<UnigramProperty::ShortcutProperty> shortcuts; 288 const UnigramProperty beginningOfSentenceUnigramProperty( 289 true /* representsBeginningOfSentence */, true /* isNotAWord */, 290 false /* isBlacklisted */, MAX_PROBABILITY /* probability */, 291 NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts); 292 if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */), 293 prevWordsInfo->getNthPrevWordCodePointCount(1 /* n */), 294 &beginningOfSentenceUnigramProperty)) { 295 AKLOGE("Cannot add unigram entry for the beginning-of-sentence."); 296 return false; 297 } 298 // Refresh Terminal PtNode positions. 299 prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos, 300 false /* tryLowerCaseSearch */); 301 } else { 302 return false; 303 } 304 } 305 const int word1Pos = getTerminalPtNodePositionOfWord( 306 bigramProperty->getTargetCodePoints()->data(), 307 bigramProperty->getTargetCodePoints()->size(), false /* forceLowerCaseSearch */); 308 if (word1Pos == NOT_A_DICT_POS) { 309 return false; 310 } 311 bool addedNewBigram = false; 312 if (mUpdatingHelper.addNgramEntry(PtNodePosArrayView::fromObject(prevWordsPtNodePos), 313 word1Pos, bigramProperty, &addedNewBigram)) { 314 if (addedNewBigram) { 315 mBigramCount++; 316 } 317 return true; 318 } else { 319 return false; 320 } 321} 322 323bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, 324 const int *const word, const int length) { 325 if (!mBuffers->isUpdatable()) { 326 AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary."); 327 return false; 328 } 329 if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { 330 AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d", 331 mDictBuffer->getTailPosition()); 332 return false; 333 } 334 if (!prevWordsInfo->isValid()) { 335 AKLOGE("prev words info is not valid for removing n-gram entry form the dictionary."); 336 return false; 337 } 338 if (length > MAX_WORD_LENGTH) { 339 AKLOGE("word is too long to remove n-gram entry form the dictionary. length: %d", length); 340 } 341 int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; 342 prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos, 343 false /* tryLowerCaseSerch */); 344 // TODO: Support N-gram. 345 if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) { 346 return false; 347 } 348 const int wordPos = getTerminalPtNodePositionOfWord(word, length, 349 false /* forceLowerCaseSearch */); 350 if (wordPos == NOT_A_DICT_POS) { 351 return false; 352 } 353 if (mUpdatingHelper.removeNgramEntry( 354 PtNodePosArrayView::fromObject(prevWordsPtNodePos), wordPos)) { 355 mBigramCount--; 356 return true; 357 } else { 358 return false; 359 } 360} 361 362bool Ver4PatriciaTriePolicy::flush(const char *const filePath) { 363 if (!mBuffers->isUpdatable()) { 364 AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath); 365 return false; 366 } 367 if (!mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount)) { 368 AKLOGE("Cannot flush the dictionary to file."); 369 mIsCorrupted = true; 370 return false; 371 } 372 return true; 373} 374 375bool Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) { 376 if (!mBuffers->isUpdatable()) { 377 AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary."); 378 return false; 379 } 380 if (!mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath)) { 381 AKLOGE("Cannot flush the dictionary to file with GC."); 382 mIsCorrupted = true; 383 return false; 384 } 385 return true; 386} 387 388bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const { 389 if (!mBuffers->isUpdatable()) { 390 AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary."); 391 return false; 392 } 393 if (mBuffers->isNearSizeLimit()) { 394 // Additional buffer size is near the limit. 395 return true; 396 } else if (mHeaderPolicy->getExtendedRegionSize() + mDictBuffer->getUsedAdditionalBufferSize() 397 > Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE) { 398 // Total extended region size of the trie exceeds the limit. 399 return true; 400 } else if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS 401 && mDictBuffer->getUsedAdditionalBufferSize() > 0) { 402 // Needs to reduce dictionary size. 403 return true; 404 } else if (mHeaderPolicy->isDecayingDict()) { 405 return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mUnigramCount, mBigramCount, 406 mHeaderPolicy); 407 } 408 return false; 409} 410 411void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int queryLength, 412 char *const outResult, const int maxResultLength) { 413 const int compareLength = queryLength + 1 /* terminator */; 414 if (strncmp(query, UNIGRAM_COUNT_QUERY, compareLength) == 0) { 415 snprintf(outResult, maxResultLength, "%d", mUnigramCount); 416 } else if (strncmp(query, BIGRAM_COUNT_QUERY, compareLength) == 0) { 417 snprintf(outResult, maxResultLength, "%d", mBigramCount); 418 } else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, compareLength) == 0) { 419 snprintf(outResult, maxResultLength, "%d", 420 mHeaderPolicy->isDecayingDict() ? 421 ForgettingCurveUtils::getUnigramCountHardLimit( 422 mHeaderPolicy->getMaxUnigramCount()) : 423 static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE)); 424 } else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, compareLength) == 0) { 425 snprintf(outResult, maxResultLength, "%d", 426 mHeaderPolicy->isDecayingDict() ? 427 ForgettingCurveUtils::getBigramCountHardLimit( 428 mHeaderPolicy->getMaxBigramCount()) : 429 static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE)); 430 } 431} 432 433const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const codePoints, 434 const int codePointCount) const { 435 const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount, 436 false /* forceLowerCaseSearch */); 437 if (ptNodePos == NOT_A_DICT_POS) { 438 AKLOGE("getWordProperty is called for invalid word."); 439 return WordProperty(); 440 } 441 const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos); 442 std::vector<int> codePointVector(ptNodeParams.getCodePoints(), 443 ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount()); 444 const ProbabilityEntry probabilityEntry = 445 mBuffers->getProbabilityDictContent()->getProbabilityEntry( 446 ptNodeParams.getTerminalId()); 447 const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo(); 448 // Fetch bigram information. 449 std::vector<BigramProperty> bigrams; 450 const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos); 451 if (bigramListPos != NOT_A_DICT_POS) { 452 int bigramWord1CodePoints[MAX_WORD_LENGTH]; 453 const BigramDictContent *const bigramDictContent = mBuffers->getBigramDictContent(); 454 const TerminalPositionLookupTable *const terminalPositionLookupTable = 455 mBuffers->getTerminalPositionLookupTable(); 456 bool hasNext = true; 457 int readingPos = bigramListPos; 458 while (hasNext) { 459 const BigramEntry bigramEntry = 460 bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); 461 hasNext = bigramEntry.hasNext(); 462 const int word1TerminalId = bigramEntry.getTargetTerminalId(); 463 const int word1TerminalPtNodePos = 464 terminalPositionLookupTable->getTerminalPtNodePosition(word1TerminalId); 465 if (word1TerminalPtNodePos == NOT_A_DICT_POS) { 466 continue; 467 } 468 // Word (unigram) probability 469 int word1Probability = NOT_A_PROBABILITY; 470 const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount( 471 word1TerminalPtNodePos, MAX_WORD_LENGTH, bigramWord1CodePoints, 472 &word1Probability); 473 const std::vector<int> word1(bigramWord1CodePoints, 474 bigramWord1CodePoints + codePointCount); 475 const HistoricalInfo *const historicalInfo = bigramEntry.getHistoricalInfo(); 476 const int probability = bigramEntry.hasHistoricalInfo() ? 477 ForgettingCurveUtils::decodeProbability( 478 bigramEntry.getHistoricalInfo(), mHeaderPolicy) : 479 bigramEntry.getProbability(); 480 bigrams.emplace_back(&word1, probability, 481 historicalInfo->getTimeStamp(), historicalInfo->getLevel(), 482 historicalInfo->getCount()); 483 } 484 } 485 // Fetch shortcut information. 486 std::vector<UnigramProperty::ShortcutProperty> shortcuts; 487 int shortcutPos = getShortcutPositionOfPtNode(ptNodePos); 488 if (shortcutPos != NOT_A_DICT_POS) { 489 int shortcutTarget[MAX_WORD_LENGTH]; 490 const ShortcutDictContent *const shortcutDictContent = 491 mBuffers->getShortcutDictContent(); 492 bool hasNext = true; 493 while (hasNext) { 494 int shortcutTargetLength = 0; 495 int shortcutProbability = NOT_A_PROBABILITY; 496 shortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, shortcutTarget, 497 &shortcutTargetLength, &shortcutProbability, &hasNext, &shortcutPos); 498 const std::vector<int> target(shortcutTarget, shortcutTarget + shortcutTargetLength); 499 shortcuts.emplace_back(&target, shortcutProbability); 500 } 501 } 502 const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(), 503 ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(), 504 historicalInfo->getTimeStamp(), historicalInfo->getLevel(), 505 historicalInfo->getCount(), &shortcuts); 506 return WordProperty(&codePointVector, &unigramProperty, &bigrams); 507} 508 509int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints, 510 int *const outCodePointCount) { 511 *outCodePointCount = 0; 512 if (token == 0) { 513 mTerminalPtNodePositionsForIteratingWords.clear(); 514 DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy( 515 &mTerminalPtNodePositionsForIteratingWords); 516 DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); 517 readingHelper.initWithPtNodeArrayPos(getRootPosition()); 518 readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy); 519 } 520 const int terminalPtNodePositionsVectorSize = 521 static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size()); 522 if (token < 0 || token >= terminalPtNodePositionsVectorSize) { 523 AKLOGE("Given token %d is invalid.", token); 524 return 0; 525 } 526 const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token]; 527 int unigramProbability = NOT_A_PROBABILITY; 528 *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount( 529 terminalPtNodePos, MAX_WORD_LENGTH, outCodePoints, &unigramProbability); 530 const int nextToken = token + 1; 531 if (nextToken >= terminalPtNodePositionsVectorSize) { 532 // All words have been iterated. 533 mTerminalPtNodePositionsForIteratingWords.clear(); 534 return 0; 535 } 536 return nextToken; 537} 538 539} // namespace v402 540} // namespace backward 541} // namespace latinime 542