ver4_patricia_trie_policy.cpp revision 96d47fe7457ff1dbea4696a5e0edec2801610d47
1/* 2 * Copyright (C) 2013, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* 18 * !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!! 19 * Do not edit this file other than updating policy's interface. 20 * 21 * This file was generated from 22 * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp 23 */ 24 25#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h" 26 27#include <vector> 28 29#include "suggest/core/dicnode/dic_node.h" 30#include "suggest/core/dicnode/dic_node_vector.h" 31#include "suggest/core/dictionary/property/bigram_property.h" 32#include "suggest/core/dictionary/property/unigram_property.h" 33#include "suggest/core/dictionary/property/word_property.h" 34#include "suggest/core/session/prev_words_info.h" 35#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" 36#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h" 37#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" 38#include "suggest/policyimpl/dictionary/utils/probability_utils.h" 39 40namespace latinime { 41namespace backward { 42namespace v402 { 43 44// Note that there are corresponding definitions in Java side in BinaryDictionaryTests and 45// BinaryDictionaryDecayingTests. 46const char *const Ver4PatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT"; 47const char *const Ver4PatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT"; 48const char *const Ver4PatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT"; 49const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT"; 50const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024; 51const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS = 52 Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS; 53 54void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode, 55 DicNodeVector *const childDicNodes) const { 56 if (!dicNode->hasChildren()) { 57 return; 58 } 59 DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); 60 readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos()); 61 while (!readingHelper.isEnd()) { 62 const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams(); 63 if (!ptNodeParams.isValid()) { 64 break; 65 } 66 bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted(); 67 if (isTerminal && mHeaderPolicy->isDecayingDict()) { 68 // A DecayingDict may have a terminal PtNode that has a terminal DicNode whose 69 // probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a 70 // valid terminal DicNode. 71 isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY; 72 } 73 readingHelper.readNextSiblingNode(ptNodeParams); 74 if (ptNodeParams.representsNonWordInfo()) { 75 // Skip PtNodes that represent non-word information. 76 continue; 77 } 78 childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(), 79 ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal, 80 ptNodeParams.hasChildren(), 81 ptNodeParams.isBlacklisted() 82 || ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */, 83 ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints()); 84 } 85 if (readingHelper.isError()) { 86 mIsCorrupted = true; 87 AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes()."); 88 } 89} 90 91int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( 92 const int ptNodePos, const int maxCodePointCount, int *const outCodePoints, 93 int *const outUnigramProbability) const { 94 DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); 95 readingHelper.initWithPtNodePos(ptNodePos); 96 const int codePointCount = readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount( 97 maxCodePointCount, outCodePoints, outUnigramProbability); 98 if (readingHelper.isError()) { 99 mIsCorrupted = true; 100 AKLOGE("Dictionary reading error in getCodePointsAndProbabilityAndReturnCodePointCount()."); 101 } 102 return codePointCount; 103} 104 105int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, 106 const int length, const bool forceLowerCaseSearch) const { 107 DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); 108 readingHelper.initWithPtNodeArrayPos(getRootPosition()); 109 const int ptNodePos = 110 readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch); 111 if (readingHelper.isError()) { 112 mIsCorrupted = true; 113 AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes()."); 114 } 115 return ptNodePos; 116} 117 118int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability, 119 const int bigramProbability) const { 120 if (mHeaderPolicy->isDecayingDict()) { 121 // Both probabilities are encoded. Decode them and get probability. 122 return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability); 123 } else { 124 if (unigramProbability == NOT_A_PROBABILITY) { 125 return NOT_A_PROBABILITY; 126 } else if (bigramProbability == NOT_A_PROBABILITY) { 127 return ProbabilityUtils::backoff(unigramProbability); 128 } else { 129 return bigramProbability; 130 } 131 } 132} 133 134int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo, 135 const int ptNodePos) const { 136 if (ptNodePos == NOT_A_DICT_POS) { 137 return NOT_A_PROBABILITY; 138 } 139 const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos)); 140 if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) { 141 return NOT_A_PROBABILITY; 142 } 143 return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY); 144} 145 146int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const { 147 if (ptNodePos == NOT_A_DICT_POS) { 148 return NOT_A_DICT_POS; 149 } 150 const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos)); 151 if (ptNodeParams.isDeleted()) { 152 return NOT_A_DICT_POS; 153 } 154 return mBuffers->getShortcutDictContent()->getShortcutListHeadPos( 155 ptNodeParams.getTerminalId()); 156} 157 158BinaryDictionaryBigramsIterator Ver4PatriciaTriePolicy::getBigramsIteratorOfPtNode( 159 const int ptNodePos) const { 160 const int bigramsPosition = getBigramsPositionOfPtNode(ptNodePos); 161 return BinaryDictionaryBigramsIterator(&mBigramPolicy, bigramsPosition); 162} 163 164int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { 165 if (ptNodePos == NOT_A_DICT_POS) { 166 return NOT_A_DICT_POS; 167 } 168 const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos)); 169 if (ptNodeParams.isDeleted()) { 170 return NOT_A_DICT_POS; 171 } 172 return mBuffers->getBigramDictContent()->getBigramListHeadPos( 173 ptNodeParams.getTerminalId()); 174} 175 176bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int length, 177 const UnigramProperty *const unigramProperty) { 178 if (!mBuffers->isUpdatable()) { 179 AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary."); 180 return false; 181 } 182 if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { 183 AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d", 184 mDictBuffer->getTailPosition()); 185 return false; 186 } 187 if (length > MAX_WORD_LENGTH) { 188 AKLOGE("The word is too long to insert to the dictionary, length: %d", length); 189 return false; 190 } 191 for (const auto &shortcut : unigramProperty->getShortcuts()) { 192 if (shortcut.getTargetCodePoints()->size() > MAX_WORD_LENGTH) { 193 AKLOGE("One of shortcut targets is too long to insert to the dictionary, length: %d", 194 shortcut.getTargetCodePoints()->size()); 195 return false; 196 } 197 } 198 DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); 199 readingHelper.initWithPtNodeArrayPos(getRootPosition()); 200 bool addedNewUnigram = false; 201 int codePointsToAdd[MAX_WORD_LENGTH]; 202 int codePointCountToAdd = length; 203 memmove(codePointsToAdd, word, sizeof(int) * length); 204 if (unigramProperty->representsBeginningOfSentence()) { 205 codePointCountToAdd = CharUtils::attachBeginningOfSentenceMarker(codePointsToAdd, 206 codePointCountToAdd, MAX_WORD_LENGTH); 207 } 208 if (codePointCountToAdd <= 0) { 209 return false; 210 } 211 if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointsToAdd, codePointCountToAdd, 212 unigramProperty, &addedNewUnigram)) { 213 if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) { 214 mUnigramCount++; 215 } 216 if (unigramProperty->getShortcuts().size() > 0) { 217 // Add shortcut target. 218 const int wordPos = getTerminalPtNodePositionOfWord(word, length, 219 false /* forceLowerCaseSearch */); 220 if (wordPos == NOT_A_DICT_POS) { 221 AKLOGE("Cannot find terminal PtNode position to add shortcut target."); 222 return false; 223 } 224 for (const auto &shortcut : unigramProperty->getShortcuts()) { 225 if (!mUpdatingHelper.addShortcutTarget(wordPos, 226 shortcut.getTargetCodePoints()->data(), 227 shortcut.getTargetCodePoints()->size(), shortcut.getProbability())) { 228 AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %d, " 229 "probability: %d", wordPos, shortcut.getTargetCodePoints()->size(), 230 shortcut.getProbability()); 231 return false; 232 } 233 } 234 } 235 return true; 236 } else { 237 return false; 238 } 239} 240 241bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo, 242 const BigramProperty *const bigramProperty) { 243 if (!mBuffers->isUpdatable()) { 244 AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary."); 245 return false; 246 } 247 if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { 248 AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d", 249 mDictBuffer->getTailPosition()); 250 return false; 251 } 252 if (!prevWordsInfo->isValid()) { 253 AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary."); 254 return false; 255 } 256 if (bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) { 257 AKLOGE("The word is too long to insert the ngram to the dictionary. " 258 "length: %d", bigramProperty->getTargetCodePoints()->size()); 259 return false; 260 } 261 int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; 262 prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos, 263 false /* tryLowerCaseSearch */); 264 // TODO: Support N-gram. 265 if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) { 266 if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) { 267 const std::vector<UnigramProperty::ShortcutProperty> shortcuts; 268 const UnigramProperty beginningOfSentenceUnigramProperty( 269 true /* representsBeginningOfSentence */, true /* isNotAWord */, 270 false /* isBlacklisted */, MAX_PROBABILITY /* probability */, 271 NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts); 272 if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */), 273 prevWordsInfo->getNthPrevWordCodePointCount(1 /* n */), 274 &beginningOfSentenceUnigramProperty)) { 275 AKLOGE("Cannot add unigram entry for the beginning-of-sentence."); 276 return false; 277 } 278 // Refresh Terminal PtNode positions. 279 prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos, 280 false /* tryLowerCaseSearch */); 281 } else { 282 return false; 283 } 284 } 285 const int word1Pos = getTerminalPtNodePositionOfWord( 286 bigramProperty->getTargetCodePoints()->data(), 287 bigramProperty->getTargetCodePoints()->size(), false /* forceLowerCaseSearch */); 288 if (word1Pos == NOT_A_DICT_POS) { 289 return false; 290 } 291 bool addedNewBigram = false; 292 if (mUpdatingHelper.addBigramWords(prevWordsPtNodePos[0], word1Pos, bigramProperty, 293 &addedNewBigram)) { 294 if (addedNewBigram) { 295 mBigramCount++; 296 } 297 return true; 298 } else { 299 return false; 300 } 301} 302 303bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, 304 const int *const word, const int length) { 305 if (!mBuffers->isUpdatable()) { 306 AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary."); 307 return false; 308 } 309 if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { 310 AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d", 311 mDictBuffer->getTailPosition()); 312 return false; 313 } 314 if (!prevWordsInfo->isValid()) { 315 AKLOGE("prev words info is not valid for removing n-gram entry form the dictionary."); 316 return false; 317 } 318 if (length > MAX_WORD_LENGTH) { 319 AKLOGE("word is too long to remove n-gram entry form the dictionary. length: %d", length); 320 } 321 int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; 322 prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos, 323 false /* tryLowerCaseSerch */); 324 // TODO: Support N-gram. 325 if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) { 326 return false; 327 } 328 const int wordPos = getTerminalPtNodePositionOfWord(word, length, 329 false /* forceLowerCaseSearch */); 330 if (wordPos == NOT_A_DICT_POS) { 331 return false; 332 } 333 if (mUpdatingHelper.removeBigramWords(prevWordsPtNodePos[0], wordPos)) { 334 mBigramCount--; 335 return true; 336 } else { 337 return false; 338 } 339} 340 341bool Ver4PatriciaTriePolicy::flush(const char *const filePath) { 342 if (!mBuffers->isUpdatable()) { 343 AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath); 344 return false; 345 } 346 if (!mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount)) { 347 AKLOGE("Cannot flush the dictionary to file."); 348 mIsCorrupted = true; 349 return false; 350 } 351 return true; 352} 353 354bool Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) { 355 if (!mBuffers->isUpdatable()) { 356 AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary."); 357 return false; 358 } 359 if (!mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath)) { 360 AKLOGE("Cannot flush the dictionary to file with GC."); 361 mIsCorrupted = true; 362 return false; 363 } 364 return true; 365} 366 367bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const { 368 if (!mBuffers->isUpdatable()) { 369 AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary."); 370 return false; 371 } 372 if (mBuffers->isNearSizeLimit()) { 373 // Additional buffer size is near the limit. 374 return true; 375 } else if (mHeaderPolicy->getExtendedRegionSize() + mDictBuffer->getUsedAdditionalBufferSize() 376 > Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE) { 377 // Total extended region size of the trie exceeds the limit. 378 return true; 379 } else if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS 380 && mDictBuffer->getUsedAdditionalBufferSize() > 0) { 381 // Needs to reduce dictionary size. 382 return true; 383 } else if (mHeaderPolicy->isDecayingDict()) { 384 return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mUnigramCount, mBigramCount, 385 mHeaderPolicy); 386 } 387 return false; 388} 389 390void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int queryLength, 391 char *const outResult, const int maxResultLength) { 392 const int compareLength = queryLength + 1 /* terminator */; 393 if (strncmp(query, UNIGRAM_COUNT_QUERY, compareLength) == 0) { 394 snprintf(outResult, maxResultLength, "%d", mUnigramCount); 395 } else if (strncmp(query, BIGRAM_COUNT_QUERY, compareLength) == 0) { 396 snprintf(outResult, maxResultLength, "%d", mBigramCount); 397 } else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, compareLength) == 0) { 398 snprintf(outResult, maxResultLength, "%d", 399 mHeaderPolicy->isDecayingDict() ? 400 ForgettingCurveUtils::getUnigramCountHardLimit( 401 mHeaderPolicy->getMaxUnigramCount()) : 402 static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE)); 403 } else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, compareLength) == 0) { 404 snprintf(outResult, maxResultLength, "%d", 405 mHeaderPolicy->isDecayingDict() ? 406 ForgettingCurveUtils::getBigramCountHardLimit( 407 mHeaderPolicy->getMaxBigramCount()) : 408 static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE)); 409 } 410} 411 412const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const codePoints, 413 const int codePointCount) const { 414 const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount, 415 false /* forceLowerCaseSearch */); 416 if (ptNodePos == NOT_A_DICT_POS) { 417 AKLOGE("getWordProperty is called for invalid word."); 418 return WordProperty(); 419 } 420 const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos); 421 std::vector<int> codePointVector(ptNodeParams.getCodePoints(), 422 ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount()); 423 const ProbabilityEntry probabilityEntry = 424 mBuffers->getProbabilityDictContent()->getProbabilityEntry( 425 ptNodeParams.getTerminalId()); 426 const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo(); 427 // Fetch bigram information. 428 std::vector<BigramProperty> bigrams; 429 const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos); 430 if (bigramListPos != NOT_A_DICT_POS) { 431 int bigramWord1CodePoints[MAX_WORD_LENGTH]; 432 const BigramDictContent *const bigramDictContent = mBuffers->getBigramDictContent(); 433 const TerminalPositionLookupTable *const terminalPositionLookupTable = 434 mBuffers->getTerminalPositionLookupTable(); 435 bool hasNext = true; 436 int readingPos = bigramListPos; 437 while (hasNext) { 438 const BigramEntry bigramEntry = 439 bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); 440 hasNext = bigramEntry.hasNext(); 441 const int word1TerminalId = bigramEntry.getTargetTerminalId(); 442 const int word1TerminalPtNodePos = 443 terminalPositionLookupTable->getTerminalPtNodePosition(word1TerminalId); 444 if (word1TerminalPtNodePos == NOT_A_DICT_POS) { 445 continue; 446 } 447 // Word (unigram) probability 448 int word1Probability = NOT_A_PROBABILITY; 449 const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount( 450 word1TerminalPtNodePos, MAX_WORD_LENGTH, bigramWord1CodePoints, 451 &word1Probability); 452 const std::vector<int> word1(bigramWord1CodePoints, 453 bigramWord1CodePoints + codePointCount); 454 const HistoricalInfo *const historicalInfo = bigramEntry.getHistoricalInfo(); 455 const int probability = bigramEntry.hasHistoricalInfo() ? 456 ForgettingCurveUtils::decodeProbability( 457 bigramEntry.getHistoricalInfo(), mHeaderPolicy) : 458 bigramEntry.getProbability(); 459 bigrams.emplace_back(&word1, probability, 460 historicalInfo->getTimeStamp(), historicalInfo->getLevel(), 461 historicalInfo->getCount()); 462 } 463 } 464 // Fetch shortcut information. 465 std::vector<UnigramProperty::ShortcutProperty> shortcuts; 466 int shortcutPos = getShortcutPositionOfPtNode(ptNodePos); 467 if (shortcutPos != NOT_A_DICT_POS) { 468 int shortcutTarget[MAX_WORD_LENGTH]; 469 const ShortcutDictContent *const shortcutDictContent = 470 mBuffers->getShortcutDictContent(); 471 bool hasNext = true; 472 while (hasNext) { 473 int shortcutTargetLength = 0; 474 int shortcutProbability = NOT_A_PROBABILITY; 475 shortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, shortcutTarget, 476 &shortcutTargetLength, &shortcutProbability, &hasNext, &shortcutPos); 477 const std::vector<int> target(shortcutTarget, shortcutTarget + shortcutTargetLength); 478 shortcuts.emplace_back(&target, shortcutProbability); 479 } 480 } 481 const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(), 482 ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(), 483 historicalInfo->getTimeStamp(), historicalInfo->getLevel(), 484 historicalInfo->getCount(), &shortcuts); 485 return WordProperty(&codePointVector, &unigramProperty, &bigrams); 486} 487 488int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints, 489 int *const outCodePointCount) { 490 *outCodePointCount = 0; 491 if (token == 0) { 492 mTerminalPtNodePositionsForIteratingWords.clear(); 493 DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy( 494 &mTerminalPtNodePositionsForIteratingWords); 495 DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); 496 readingHelper.initWithPtNodeArrayPos(getRootPosition()); 497 readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy); 498 } 499 const int terminalPtNodePositionsVectorSize = 500 static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size()); 501 if (token < 0 || token >= terminalPtNodePositionsVectorSize) { 502 AKLOGE("Given token %d is invalid.", token); 503 return 0; 504 } 505 const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token]; 506 int unigramProbability = NOT_A_PROBABILITY; 507 *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount( 508 terminalPtNodePos, MAX_WORD_LENGTH, outCodePoints, &unigramProbability); 509 const int nextToken = token + 1; 510 if (nextToken >= terminalPtNodePositionsVectorSize) { 511 // All words have been iterated. 512 mTerminalPtNodePositionsForIteratingWords.clear(); 513 return 0; 514 } 515 return nextToken; 516} 517 518} // namespace v402 519} // namespace backward 520} // namespace latinime 521