ver4_patricia_trie_policy.cpp revision 0e6a1d1020c537d847ac4222cf621dea9db4311e
1/* 2 * Copyright (C) 2013, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* 18 * !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!! 19 * Do not edit this file other than updating policy's interface. 20 * 21 * This file was generated from 22 * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp 23 */ 24 25#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h" 26 27#include <vector> 28 29#include "suggest/core/dicnode/dic_node.h" 30#include "suggest/core/dicnode/dic_node_vector.h" 31#include "suggest/core/dictionary/ngram_listener.h" 32#include "suggest/core/dictionary/property/bigram_property.h" 33#include "suggest/core/dictionary/property/unigram_property.h" 34#include "suggest/core/dictionary/property/word_property.h" 35#include "suggest/core/session/prev_words_info.h" 36#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" 37#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h" 38#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" 39#include "suggest/policyimpl/dictionary/utils/probability_utils.h" 40 41namespace latinime { 42namespace backward { 43namespace v402 { 44 45// Note that there are corresponding definitions in Java side in BinaryDictionaryTests and 46// BinaryDictionaryDecayingTests. 47const char *const Ver4PatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT"; 48const char *const Ver4PatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT"; 49const char *const Ver4PatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT"; 50const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT"; 51const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024; 52const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS = 53 Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS; 54 55void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode, 56 DicNodeVector *const childDicNodes) const { 57 if (!dicNode->hasChildren()) { 58 return; 59 } 60 DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); 61 readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos()); 62 while (!readingHelper.isEnd()) { 63 const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams(); 64 if (!ptNodeParams.isValid()) { 65 break; 66 } 67 bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted(); 68 if (isTerminal && mHeaderPolicy->isDecayingDict()) { 69 // A DecayingDict may have a terminal PtNode that has a terminal DicNode whose 70 // probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a 71 // valid terminal DicNode. 72 isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY; 73 } 74 readingHelper.readNextSiblingNode(ptNodeParams); 75 if (ptNodeParams.representsNonWordInfo()) { 76 // Skip PtNodes that represent non-word information. 77 continue; 78 } 79 childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(), 80 ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal, 81 ptNodeParams.hasChildren(), 82 ptNodeParams.isBlacklisted() 83 || ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */, 84 ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints()); 85 } 86 if (readingHelper.isError()) { 87 mIsCorrupted = true; 88 AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes()."); 89 } 90} 91 92int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( 93 const int ptNodePos, const int maxCodePointCount, int *const outCodePoints, 94 int *const outUnigramProbability) const { 95 DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); 96 readingHelper.initWithPtNodePos(ptNodePos); 97 const int codePointCount = readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount( 98 maxCodePointCount, outCodePoints, outUnigramProbability); 99 if (readingHelper.isError()) { 100 mIsCorrupted = true; 101 AKLOGE("Dictionary reading error in getCodePointsAndProbabilityAndReturnCodePointCount()."); 102 } 103 return codePointCount; 104} 105 106int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, 107 const int length, const bool forceLowerCaseSearch) const { 108 DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); 109 readingHelper.initWithPtNodeArrayPos(getRootPosition()); 110 const int ptNodePos = 111 readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch); 112 if (readingHelper.isError()) { 113 mIsCorrupted = true; 114 AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes()."); 115 } 116 return ptNodePos; 117} 118 119int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability, 120 const int bigramProbability) const { 121 if (mHeaderPolicy->isDecayingDict()) { 122 // Both probabilities are encoded. Decode them and get probability. 123 return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability); 124 } else { 125 if (unigramProbability == NOT_A_PROBABILITY) { 126 return NOT_A_PROBABILITY; 127 } else if (bigramProbability == NOT_A_PROBABILITY) { 128 return ProbabilityUtils::backoff(unigramProbability); 129 } else { 130 return bigramProbability; 131 } 132 } 133} 134 135int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const int *const prevWordsPtNodePos, 136 const int ptNodePos) const { 137 if (ptNodePos == NOT_A_DICT_POS) { 138 return NOT_A_PROBABILITY; 139 } 140 const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos)); 141 if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) { 142 return NOT_A_PROBABILITY; 143 } 144 if (prevWordsPtNodePos) { 145 const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]); 146 BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition); 147 while (bigramsIt.hasNext()) { 148 bigramsIt.next(); 149 if (bigramsIt.getBigramPos() == ptNodePos 150 && bigramsIt.getProbability() != NOT_A_PROBABILITY) { 151 return getProbability(ptNodeParams.getProbability(), bigramsIt.getProbability()); 152 } 153 } 154 return NOT_A_PROBABILITY; 155 } 156 return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY); 157} 158 159void Ver4PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordsPtNodePos, 160 NgramListener *const listener) const { 161 if (!prevWordsPtNodePos) { 162 return; 163 } 164 const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]); 165 BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition); 166 while (bigramsIt.hasNext()) { 167 bigramsIt.next(); 168 listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos()); 169 } 170} 171 172int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const { 173 if (ptNodePos == NOT_A_DICT_POS) { 174 return NOT_A_DICT_POS; 175 } 176 const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos)); 177 if (ptNodeParams.isDeleted()) { 178 return NOT_A_DICT_POS; 179 } 180 return mBuffers->getShortcutDictContent()->getShortcutListHeadPos( 181 ptNodeParams.getTerminalId()); 182} 183 184int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { 185 if (ptNodePos == NOT_A_DICT_POS) { 186 return NOT_A_DICT_POS; 187 } 188 const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos)); 189 if (ptNodeParams.isDeleted()) { 190 return NOT_A_DICT_POS; 191 } 192 return mBuffers->getBigramDictContent()->getBigramListHeadPos( 193 ptNodeParams.getTerminalId()); 194} 195 196bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int length, 197 const UnigramProperty *const unigramProperty) { 198 if (!mBuffers->isUpdatable()) { 199 AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary."); 200 return false; 201 } 202 if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { 203 AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d", 204 mDictBuffer->getTailPosition()); 205 return false; 206 } 207 if (length > MAX_WORD_LENGTH) { 208 AKLOGE("The word is too long to insert to the dictionary, length: %d", length); 209 return false; 210 } 211 for (const auto &shortcut : unigramProperty->getShortcuts()) { 212 if (shortcut.getTargetCodePoints()->size() > MAX_WORD_LENGTH) { 213 AKLOGE("One of shortcut targets is too long to insert to the dictionary, length: %d", 214 shortcut.getTargetCodePoints()->size()); 215 return false; 216 } 217 } 218 DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); 219 readingHelper.initWithPtNodeArrayPos(getRootPosition()); 220 bool addedNewUnigram = false; 221 int codePointsToAdd[MAX_WORD_LENGTH]; 222 int codePointCountToAdd = length; 223 memmove(codePointsToAdd, word, sizeof(int) * length); 224 if (unigramProperty->representsBeginningOfSentence()) { 225 codePointCountToAdd = CharUtils::attachBeginningOfSentenceMarker(codePointsToAdd, 226 codePointCountToAdd, MAX_WORD_LENGTH); 227 } 228 if (codePointCountToAdd <= 0) { 229 return false; 230 } 231 if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointsToAdd, codePointCountToAdd, 232 unigramProperty, &addedNewUnigram)) { 233 if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) { 234 mUnigramCount++; 235 } 236 if (unigramProperty->getShortcuts().size() > 0) { 237 // Add shortcut target. 238 const int wordPos = getTerminalPtNodePositionOfWord(word, length, 239 false /* forceLowerCaseSearch */); 240 if (wordPos == NOT_A_DICT_POS) { 241 AKLOGE("Cannot find terminal PtNode position to add shortcut target."); 242 return false; 243 } 244 for (const auto &shortcut : unigramProperty->getShortcuts()) { 245 if (!mUpdatingHelper.addShortcutTarget(wordPos, 246 shortcut.getTargetCodePoints()->data(), 247 shortcut.getTargetCodePoints()->size(), shortcut.getProbability())) { 248 AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %d, " 249 "probability: %d", wordPos, shortcut.getTargetCodePoints()->size(), 250 shortcut.getProbability()); 251 return false; 252 } 253 } 254 } 255 return true; 256 } else { 257 return false; 258 } 259} 260 261bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo, 262 const BigramProperty *const bigramProperty) { 263 if (!mBuffers->isUpdatable()) { 264 AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary."); 265 return false; 266 } 267 if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { 268 AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d", 269 mDictBuffer->getTailPosition()); 270 return false; 271 } 272 if (!prevWordsInfo->isValid()) { 273 AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary."); 274 return false; 275 } 276 if (bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) { 277 AKLOGE("The word is too long to insert the ngram to the dictionary. " 278 "length: %d", bigramProperty->getTargetCodePoints()->size()); 279 return false; 280 } 281 int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; 282 prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos, 283 false /* tryLowerCaseSearch */); 284 // TODO: Support N-gram. 285 if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) { 286 if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) { 287 const std::vector<UnigramProperty::ShortcutProperty> shortcuts; 288 const UnigramProperty beginningOfSentenceUnigramProperty( 289 true /* representsBeginningOfSentence */, true /* isNotAWord */, 290 false /* isBlacklisted */, MAX_PROBABILITY /* probability */, 291 NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts); 292 if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */), 293 prevWordsInfo->getNthPrevWordCodePointCount(1 /* n */), 294 &beginningOfSentenceUnigramProperty)) { 295 AKLOGE("Cannot add unigram entry for the beginning-of-sentence."); 296 return false; 297 } 298 // Refresh Terminal PtNode positions. 299 prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos, 300 false /* tryLowerCaseSearch */); 301 } else { 302 return false; 303 } 304 } 305 const int word1Pos = getTerminalPtNodePositionOfWord( 306 bigramProperty->getTargetCodePoints()->data(), 307 bigramProperty->getTargetCodePoints()->size(), false /* forceLowerCaseSearch */); 308 if (word1Pos == NOT_A_DICT_POS) { 309 return false; 310 } 311 bool addedNewBigram = false; 312 if (mUpdatingHelper.addBigramWords(prevWordsPtNodePos[0], word1Pos, bigramProperty, 313 &addedNewBigram)) { 314 if (addedNewBigram) { 315 mBigramCount++; 316 } 317 return true; 318 } else { 319 return false; 320 } 321} 322 323bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, 324 const int *const word, const int length) { 325 if (!mBuffers->isUpdatable()) { 326 AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary."); 327 return false; 328 } 329 if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { 330 AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d", 331 mDictBuffer->getTailPosition()); 332 return false; 333 } 334 if (!prevWordsInfo->isValid()) { 335 AKLOGE("prev words info is not valid for removing n-gram entry form the dictionary."); 336 return false; 337 } 338 if (length > MAX_WORD_LENGTH) { 339 AKLOGE("word is too long to remove n-gram entry form the dictionary. length: %d", length); 340 } 341 int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; 342 prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos, 343 false /* tryLowerCaseSerch */); 344 // TODO: Support N-gram. 345 if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) { 346 return false; 347 } 348 const int wordPos = getTerminalPtNodePositionOfWord(word, length, 349 false /* forceLowerCaseSearch */); 350 if (wordPos == NOT_A_DICT_POS) { 351 return false; 352 } 353 if (mUpdatingHelper.removeBigramWords(prevWordsPtNodePos[0], wordPos)) { 354 mBigramCount--; 355 return true; 356 } else { 357 return false; 358 } 359} 360 361bool Ver4PatriciaTriePolicy::flush(const char *const filePath) { 362 if (!mBuffers->isUpdatable()) { 363 AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath); 364 return false; 365 } 366 if (!mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount)) { 367 AKLOGE("Cannot flush the dictionary to file."); 368 mIsCorrupted = true; 369 return false; 370 } 371 return true; 372} 373 374bool Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) { 375 if (!mBuffers->isUpdatable()) { 376 AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary."); 377 return false; 378 } 379 if (!mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath)) { 380 AKLOGE("Cannot flush the dictionary to file with GC."); 381 mIsCorrupted = true; 382 return false; 383 } 384 return true; 385} 386 387bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const { 388 if (!mBuffers->isUpdatable()) { 389 AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary."); 390 return false; 391 } 392 if (mBuffers->isNearSizeLimit()) { 393 // Additional buffer size is near the limit. 394 return true; 395 } else if (mHeaderPolicy->getExtendedRegionSize() + mDictBuffer->getUsedAdditionalBufferSize() 396 > Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE) { 397 // Total extended region size of the trie exceeds the limit. 398 return true; 399 } else if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS 400 && mDictBuffer->getUsedAdditionalBufferSize() > 0) { 401 // Needs to reduce dictionary size. 402 return true; 403 } else if (mHeaderPolicy->isDecayingDict()) { 404 return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mUnigramCount, mBigramCount, 405 mHeaderPolicy); 406 } 407 return false; 408} 409 410void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int queryLength, 411 char *const outResult, const int maxResultLength) { 412 const int compareLength = queryLength + 1 /* terminator */; 413 if (strncmp(query, UNIGRAM_COUNT_QUERY, compareLength) == 0) { 414 snprintf(outResult, maxResultLength, "%d", mUnigramCount); 415 } else if (strncmp(query, BIGRAM_COUNT_QUERY, compareLength) == 0) { 416 snprintf(outResult, maxResultLength, "%d", mBigramCount); 417 } else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, compareLength) == 0) { 418 snprintf(outResult, maxResultLength, "%d", 419 mHeaderPolicy->isDecayingDict() ? 420 ForgettingCurveUtils::getUnigramCountHardLimit( 421 mHeaderPolicy->getMaxUnigramCount()) : 422 static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE)); 423 } else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, compareLength) == 0) { 424 snprintf(outResult, maxResultLength, "%d", 425 mHeaderPolicy->isDecayingDict() ? 426 ForgettingCurveUtils::getBigramCountHardLimit( 427 mHeaderPolicy->getMaxBigramCount()) : 428 static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE)); 429 } 430} 431 432const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const codePoints, 433 const int codePointCount) const { 434 const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount, 435 false /* forceLowerCaseSearch */); 436 if (ptNodePos == NOT_A_DICT_POS) { 437 AKLOGE("getWordProperty is called for invalid word."); 438 return WordProperty(); 439 } 440 const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos); 441 std::vector<int> codePointVector(ptNodeParams.getCodePoints(), 442 ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount()); 443 const ProbabilityEntry probabilityEntry = 444 mBuffers->getProbabilityDictContent()->getProbabilityEntry( 445 ptNodeParams.getTerminalId()); 446 const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo(); 447 // Fetch bigram information. 448 std::vector<BigramProperty> bigrams; 449 const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos); 450 if (bigramListPos != NOT_A_DICT_POS) { 451 int bigramWord1CodePoints[MAX_WORD_LENGTH]; 452 const BigramDictContent *const bigramDictContent = mBuffers->getBigramDictContent(); 453 const TerminalPositionLookupTable *const terminalPositionLookupTable = 454 mBuffers->getTerminalPositionLookupTable(); 455 bool hasNext = true; 456 int readingPos = bigramListPos; 457 while (hasNext) { 458 const BigramEntry bigramEntry = 459 bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); 460 hasNext = bigramEntry.hasNext(); 461 const int word1TerminalId = bigramEntry.getTargetTerminalId(); 462 const int word1TerminalPtNodePos = 463 terminalPositionLookupTable->getTerminalPtNodePosition(word1TerminalId); 464 if (word1TerminalPtNodePos == NOT_A_DICT_POS) { 465 continue; 466 } 467 // Word (unigram) probability 468 int word1Probability = NOT_A_PROBABILITY; 469 const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount( 470 word1TerminalPtNodePos, MAX_WORD_LENGTH, bigramWord1CodePoints, 471 &word1Probability); 472 const std::vector<int> word1(bigramWord1CodePoints, 473 bigramWord1CodePoints + codePointCount); 474 const HistoricalInfo *const historicalInfo = bigramEntry.getHistoricalInfo(); 475 const int probability = bigramEntry.hasHistoricalInfo() ? 476 ForgettingCurveUtils::decodeProbability( 477 bigramEntry.getHistoricalInfo(), mHeaderPolicy) : 478 bigramEntry.getProbability(); 479 bigrams.emplace_back(&word1, probability, 480 historicalInfo->getTimeStamp(), historicalInfo->getLevel(), 481 historicalInfo->getCount()); 482 } 483 } 484 // Fetch shortcut information. 485 std::vector<UnigramProperty::ShortcutProperty> shortcuts; 486 int shortcutPos = getShortcutPositionOfPtNode(ptNodePos); 487 if (shortcutPos != NOT_A_DICT_POS) { 488 int shortcutTarget[MAX_WORD_LENGTH]; 489 const ShortcutDictContent *const shortcutDictContent = 490 mBuffers->getShortcutDictContent(); 491 bool hasNext = true; 492 while (hasNext) { 493 int shortcutTargetLength = 0; 494 int shortcutProbability = NOT_A_PROBABILITY; 495 shortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, shortcutTarget, 496 &shortcutTargetLength, &shortcutProbability, &hasNext, &shortcutPos); 497 const std::vector<int> target(shortcutTarget, shortcutTarget + shortcutTargetLength); 498 shortcuts.emplace_back(&target, shortcutProbability); 499 } 500 } 501 const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(), 502 ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(), 503 historicalInfo->getTimeStamp(), historicalInfo->getLevel(), 504 historicalInfo->getCount(), &shortcuts); 505 return WordProperty(&codePointVector, &unigramProperty, &bigrams); 506} 507 508int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints, 509 int *const outCodePointCount) { 510 *outCodePointCount = 0; 511 if (token == 0) { 512 mTerminalPtNodePositionsForIteratingWords.clear(); 513 DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy( 514 &mTerminalPtNodePositionsForIteratingWords); 515 DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); 516 readingHelper.initWithPtNodeArrayPos(getRootPosition()); 517 readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy); 518 } 519 const int terminalPtNodePositionsVectorSize = 520 static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size()); 521 if (token < 0 || token >= terminalPtNodePositionsVectorSize) { 522 AKLOGE("Given token %d is invalid.", token); 523 return 0; 524 } 525 const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token]; 526 int unigramProbability = NOT_A_PROBABILITY; 527 *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount( 528 terminalPtNodePos, MAX_WORD_LENGTH, outCodePoints, &unigramProbability); 529 const int nextToken = token + 1; 530 if (nextToken >= terminalPtNodePositionsVectorSize) { 531 // All words have been iterated. 532 mTerminalPtNodePositionsForIteratingWords.clear(); 533 return 0; 534 } 535 return nextToken; 536} 537 538} // namespace v402 539} // namespace backward 540} // namespace latinime 541