126de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi/* 226de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi * Copyright (C) 2013, The Android Open Source Project 326de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi * 426de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi * Licensed under the Apache License, Version 2.0 (the "License"); 526de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi * you may not use this file except in compliance with the License. 626de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi * You may obtain a copy of the License at 726de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi * 826de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi * http://www.apache.org/licenses/LICENSE-2.0 926de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi * 1026de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi * Unless required by applicable law or agreed to in writing, software 1126de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi * distributed under the License is distributed on an "AS IS" BASIS, 1226de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1326de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi * See the License for the specific language governing permissions and 1426de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi * limitations under the License. 1526de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi */ 1626de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi 179601df5aac492b12ea0912ba6da8ab3d11e1f5d7Keisuke Kuroynagi#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h" 1826de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi 1931097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi#include <cstdio> 2031097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi#include <cstring> 21fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi#include <ctime> 2231097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi 2326de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi#include "defines.h" 2426de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi#include "suggest/core/dicnode/dic_node.h" 2526de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi#include "suggest/core/dicnode/dic_node_vector.h" 262b1dd6e2532ee041248c3f7c48f28d789713b18bKeisuke Kuroynagi#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" 274d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h" 282b1dd6e2532ee041248c3f7c48f28d789713b18bKeisuke Kuroynagi#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" 29f1cd7a081c1365da389e14bd190d7e15fa402eb8Keisuke Kuroyanagi#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" 302b1dd6e2532ee041248c3f7c48f28d789713b18bKeisuke Kuroynagi#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" 3113d5dc914aae5cb6bf6ef06aa05643514a40318cKeisuke Kuroyanagi#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" 3265d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi#include "suggest/policyimpl/dictionary/utils/probability_utils.h" 3326de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi 3426de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynaginamespace latinime { 3526de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi 36699531099630edd8416e309c914187c285af4c44Keisuke Kuroyanagi// Note that these are corresponding definitions in Java side in BinaryDictionaryTests and 37699531099630edd8416e309c914187c285af4c44Keisuke Kuroyanagi// BinaryDictionaryDecayingTests. 3831097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagiconst char *const DynamicPatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT"; 3931097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagiconst char *const DynamicPatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT"; 40cfb018ba6db78f2b33b54d4811f0bf166db29792Keisuke Kuroyanagiconst char *const DynamicPatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT"; 41cfb018ba6db78f2b33b54d4811f0bf166db29792Keisuke Kuroyanagiconst char *const DynamicPatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT"; 42699531099630edd8416e309c914187c285af4c44Keisuke Kuroyanagiconst char *const DynamicPatriciaTriePolicy::SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY = 43699531099630edd8416e309c914187c285af4c44Keisuke Kuroyanagi "SET_NEEDS_TO_DECAY_FOR_TESTING"; 44fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagiconst int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024; 45fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagiconst int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS = 46fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024; 4731097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi 4826de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagivoid DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, 497fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi DicNodeVector *const childDicNodes) const { 502b1dd6e2532ee041248c3f7c48f28d789713b18bKeisuke Kuroynagi if (!dicNode->hasChildren()) { 512b1dd6e2532ee041248c3f7c48f28d789713b18bKeisuke Kuroynagi return; 522b1dd6e2532ee041248c3f7c48f28d789713b18bKeisuke Kuroynagi } 533e76487c6c95ccec49622b9d7e0b45efff97f937Keisuke Kuroyanagi DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, 543e76487c6c95ccec49622b9d7e0b45efff97f937Keisuke Kuroyanagi getBigramsStructurePolicy(), getShortcutsStructurePolicy()); 5577ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPos()); 564d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); 574d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi while (!readingHelper.isEnd()) { 586bc5acaa793e0311fcfa4a0f12c49ced6d792729Keisuke Kuroyanagi bool isTerminal = nodeReader->isTerminal() && !nodeReader->isDeleted(); 596bc5acaa793e0311fcfa4a0f12c49ced6d792729Keisuke Kuroyanagi if (isTerminal && mHeaderPolicy.isDecayingDict()) { 606bc5acaa793e0311fcfa4a0f12c49ced6d792729Keisuke Kuroyanagi // A DecayingDict may have a terminal PtNode that has a terminal DicNode whose 616bc5acaa793e0311fcfa4a0f12c49ced6d792729Keisuke Kuroyanagi // probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a 626bc5acaa793e0311fcfa4a0f12c49ced6d792729Keisuke Kuroyanagi // valid terminal DicNode. 636bc5acaa793e0311fcfa4a0f12c49ced6d792729Keisuke Kuroyanagi isTerminal = getProbability(nodeReader->getProbability(), NOT_A_PROBABILITY) 646bc5acaa793e0311fcfa4a0f12c49ced6d792729Keisuke Kuroyanagi != NOT_A_PROBABILITY; 656bc5acaa793e0311fcfa4a0f12c49ced6d792729Keisuke Kuroyanagi } 668c69ddb53e05cf2740137a09dc139aed7a9831a5Keisuke Kuroyanagi childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(), 676bc5acaa793e0311fcfa4a0f12c49ced6d792729Keisuke Kuroyanagi nodeReader->getChildrenPos(), nodeReader->getProbability(), isTerminal, 684d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi nodeReader->hasChildren(), nodeReader->isBlacklisted() || nodeReader->isNotAWord(), 694d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi nodeReader->getCodePointCount(), readingHelper.getMergedNodeCodePoints()); 704d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi readingHelper.readNextSiblingNode(); 714d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi } 7226de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi} 7326de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi 7426de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagiint DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( 7577ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi const int ptNodePos, const int maxCodePointCount, int *const outCodePoints, 7626de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi int *const outUnigramProbability) const { 779601df5aac492b12ea0912ba6da8ab3d11e1f5d7Keisuke Kuroynagi // This method traverses parent nodes from the terminal by following parent pointers; thus, 789601df5aac492b12ea0912ba6da8ab3d11e1f5d7Keisuke Kuroynagi // node code points are stored in the buffer in the reverse order. 799601df5aac492b12ea0912ba6da8ab3d11e1f5d7Keisuke Kuroynagi int reverseCodePoints[maxCodePointCount]; 803e76487c6c95ccec49622b9d7e0b45efff97f937Keisuke Kuroyanagi DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, 813e76487c6c95ccec49622b9d7e0b45efff97f937Keisuke Kuroyanagi getBigramsStructurePolicy(), getShortcutsStructurePolicy()); 824d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi // First, read the terminal node and get its probability. 8377ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi readingHelper.initWithPtNodePos(ptNodePos); 844d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi if (!readingHelper.isValidTerminalNode()) { 8577ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi // Node at the ptNodePos is not a valid terminal node. 864d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi *outUnigramProbability = NOT_A_PROBABILITY; 874d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi return 0; 889601df5aac492b12ea0912ba6da8ab3d11e1f5d7Keisuke Kuroynagi } 894d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi // Store terminal node probability. 904d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi *outUnigramProbability = readingHelper.getNodeReader()->getProbability(); 914d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi // Then, following parent node link to the dictionary root and fetch node code points. 924d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi while (!readingHelper.isEnd()) { 934d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi if (readingHelper.getTotalCodePointCount() > maxCodePointCount) { 9477ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi // The ptNodePos is not a valid terminal node position in the dictionary. 959601df5aac492b12ea0912ba6da8ab3d11e1f5d7Keisuke Kuroynagi *outUnigramProbability = NOT_A_PROBABILITY; 969601df5aac492b12ea0912ba6da8ab3d11e1f5d7Keisuke Kuroynagi return 0; 979601df5aac492b12ea0912ba6da8ab3d11e1f5d7Keisuke Kuroynagi } 989601df5aac492b12ea0912ba6da8ab3d11e1f5d7Keisuke Kuroynagi // Store node code points to buffer in the reverse order. 994d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi readingHelper.fetchMergedNodeCodePointsInReverseOrder( 1004d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi readingHelper.getPrevTotalCodePointCount(), reverseCodePoints); 1014d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi // Follow parent node toward the root node. 1024d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi readingHelper.readParentNode(); 1034d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi } 1044d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi if (readingHelper.isError()) { 1054d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi // The node position or the dictionary is invalid. 1064d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi *outUnigramProbability = NOT_A_PROBABILITY; 1074d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi return 0; 1089601df5aac492b12ea0912ba6da8ab3d11e1f5d7Keisuke Kuroynagi } 1099601df5aac492b12ea0912ba6da8ab3d11e1f5d7Keisuke Kuroynagi // Reverse the stored code points to output them. 1104d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi const int codePointCount = readingHelper.getTotalCodePointCount(); 1119601df5aac492b12ea0912ba6da8ab3d11e1f5d7Keisuke Kuroynagi for (int i = 0; i < codePointCount; ++i) { 1129601df5aac492b12ea0912ba6da8ab3d11e1f5d7Keisuke Kuroynagi outCodePoints[i] = reverseCodePoints[codePointCount - i - 1]; 1139601df5aac492b12ea0912ba6da8ab3d11e1f5d7Keisuke Kuroynagi } 1149601df5aac492b12ea0912ba6da8ab3d11e1f5d7Keisuke Kuroynagi return codePointCount; 11526de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi} 11626de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi 117e1ebef6124241ef51d5ed17884e6299a330d496bKeisuke Kuroyanagiint DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, 11826de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi const int length, const bool forceLowerCaseSearch) const { 119744084defb769c4631bf0d4f9579036fd81a8af1Keisuke Kuroynagi int searchCodePoints[length]; 120744084defb769c4631bf0d4f9579036fd81a8af1Keisuke Kuroynagi for (int i = 0; i < length; ++i) { 121744084defb769c4631bf0d4f9579036fd81a8af1Keisuke Kuroynagi searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i]; 122744084defb769c4631bf0d4f9579036fd81a8af1Keisuke Kuroynagi } 1233e76487c6c95ccec49622b9d7e0b45efff97f937Keisuke Kuroyanagi DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, 1243e76487c6c95ccec49622b9d7e0b45efff97f937Keisuke Kuroyanagi getBigramsStructurePolicy(), getShortcutsStructurePolicy()); 12577ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi readingHelper.initWithPtNodeArrayPos(getRootPosition()); 1264d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); 1274d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi while (!readingHelper.isEnd()) { 1284d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi const int matchedCodePointCount = readingHelper.getPrevTotalCodePointCount(); 1294d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi if (readingHelper.getTotalCodePointCount() > length 1304d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi || !readingHelper.isMatchedCodePoint(0 /* index */, 1314d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi searchCodePoints[matchedCodePointCount])) { 1324d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi // Current node has too many code points or its first code point is different from 1334d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi // target code point. Skip this node and read the next sibling node. 1344d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi readingHelper.readNextSiblingNode(); 1354d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi continue; 1364d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi } 1374d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi // Check following merged node code points. 1384d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi const int nodeCodePointCount = nodeReader->getCodePointCount(); 1394d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi for (int j = 1; j < nodeCodePointCount; ++j) { 1404d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi if (!readingHelper.isMatchedCodePoint( 1414d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi j, searchCodePoints[matchedCodePointCount + j])) { 1424d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi // Different code point is found. The given word is not included in the dictionary. 143cb816e5e16f086d98c8d05a0a5805c1cdfaf1c02Keisuke Kuroyanagi return NOT_A_DICT_POS; 144744084defb769c4631bf0d4f9579036fd81a8af1Keisuke Kuroynagi } 1454d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi } 1464d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi // All characters are matched. 1474d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi if (length == readingHelper.getTotalCodePointCount()) { 1484d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi // Terminal position is found. 1498c69ddb53e05cf2740137a09dc139aed7a9831a5Keisuke Kuroyanagi return nodeReader->getHeadPos(); 1504d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi } 1514d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi if (!nodeReader->hasChildren()) { 152cb816e5e16f086d98c8d05a0a5805c1cdfaf1c02Keisuke Kuroyanagi return NOT_A_DICT_POS; 153744084defb769c4631bf0d4f9579036fd81a8af1Keisuke Kuroynagi } 1544d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi // Advance to the children nodes. 1554d814bfcb76c6a7637aed0046079251dfdc08095Keisuke Kuroyanagi readingHelper.readChildNode(); 156744084defb769c4631bf0d4f9579036fd81a8af1Keisuke Kuroynagi } 157744084defb769c4631bf0d4f9579036fd81a8af1Keisuke Kuroynagi // If we already traversed the tree further than the word is long, there means 158744084defb769c4631bf0d4f9579036fd81a8af1Keisuke Kuroynagi // there was no match (or we would have found it). 159cb816e5e16f086d98c8d05a0a5805c1cdfaf1c02Keisuke Kuroyanagi return NOT_A_DICT_POS; 16026de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi} 16126de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi 16265d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagiint DynamicPatriciaTriePolicy::getProbability(const int unigramProbability, 16365d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi const int bigramProbability) const { 164fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi if (mHeaderPolicy.isDecayingDict()) { 16513d5dc914aae5cb6bf6ef06aa05643514a40318cKeisuke Kuroyanagi return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability); 16665d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi } else { 167fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi if (unigramProbability == NOT_A_PROBABILITY) { 168fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi return NOT_A_PROBABILITY; 169fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi } else if (bigramProbability == NOT_A_PROBABILITY) { 170fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi return ProbabilityUtils::backoff(unigramProbability); 171fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi } else { 172fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi return ProbabilityUtils::computeProbabilityForBigram(unigramProbability, 173fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi bigramProbability); 174fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi } 17565d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi } 17665d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi} 17765d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi 17877ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagiint DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const { 17977ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi if (ptNodePos == NOT_A_DICT_POS) { 180744084defb769c4631bf0d4f9579036fd81a8af1Keisuke Kuroynagi return NOT_A_PROBABILITY; 181744084defb769c4631bf0d4f9579036fd81a8af1Keisuke Kuroynagi } 1823e76487c6c95ccec49622b9d7e0b45efff97f937Keisuke Kuroyanagi DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer, 1836c4d09e9e12d02aa87b27def6529220c93ff4588Keisuke Kuroyanagi getBigramsStructurePolicy(), getShortcutsStructurePolicy()); 18477ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos); 1852b1dd6e2532ee041248c3f7c48f28d789713b18bKeisuke Kuroynagi if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) { 1862b1dd6e2532ee041248c3f7c48f28d789713b18bKeisuke Kuroynagi return NOT_A_PROBABILITY; 1872b1dd6e2532ee041248c3f7c48f28d789713b18bKeisuke Kuroynagi } 18865d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi return getProbability(nodeReader.getProbability(), NOT_A_PROBABILITY); 18926de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi} 19026de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi 19177ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagiint DynamicPatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const { 19277ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi if (ptNodePos == NOT_A_DICT_POS) { 193744084defb769c4631bf0d4f9579036fd81a8af1Keisuke Kuroynagi return NOT_A_DICT_POS; 194744084defb769c4631bf0d4f9579036fd81a8af1Keisuke Kuroynagi } 1953e76487c6c95ccec49622b9d7e0b45efff97f937Keisuke Kuroyanagi DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer, 1966c4d09e9e12d02aa87b27def6529220c93ff4588Keisuke Kuroyanagi getBigramsStructurePolicy(), getShortcutsStructurePolicy()); 19777ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos); 1982b1dd6e2532ee041248c3f7c48f28d789713b18bKeisuke Kuroynagi if (nodeReader.isDeleted()) { 1992b1dd6e2532ee041248c3f7c48f28d789713b18bKeisuke Kuroynagi return NOT_A_DICT_POS; 2002b1dd6e2532ee041248c3f7c48f28d789713b18bKeisuke Kuroynagi } 2012b1dd6e2532ee041248c3f7c48f28d789713b18bKeisuke Kuroynagi return nodeReader.getShortcutPos(); 20226de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi} 20326de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi 20477ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagiint DynamicPatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { 20577ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi if (ptNodePos == NOT_A_DICT_POS) { 206744084defb769c4631bf0d4f9579036fd81a8af1Keisuke Kuroynagi return NOT_A_DICT_POS; 207744084defb769c4631bf0d4f9579036fd81a8af1Keisuke Kuroynagi } 2083e76487c6c95ccec49622b9d7e0b45efff97f937Keisuke Kuroyanagi DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer, 2096c4d09e9e12d02aa87b27def6529220c93ff4588Keisuke Kuroyanagi getBigramsStructurePolicy(), getShortcutsStructurePolicy()); 21077ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos); 2112b1dd6e2532ee041248c3f7c48f28d789713b18bKeisuke Kuroynagi if (nodeReader.isDeleted()) { 2122b1dd6e2532ee041248c3f7c48f28d789713b18bKeisuke Kuroynagi return NOT_A_DICT_POS; 2132b1dd6e2532ee041248c3f7c48f28d789713b18bKeisuke Kuroynagi } 2142b1dd6e2532ee041248c3f7c48f28d789713b18bKeisuke Kuroynagi return nodeReader.getBigramsPos(); 21526de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi} 21626de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi 21766facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagibool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int length, 21866facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi const int probability) { 2190624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi if (!mBuffer->isUpdatable()) { 2200624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary."); 2210624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi return false; 2220624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi } 223fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi if (mBufferWithExtendableBuffer.getTailPosition() 224fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { 225fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi AKLOGE("The dictionary is too large to dynamically update."); 226fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi return false; 227fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi } 228f1cd7a081c1365da389e14bd190d7e15fa402eb8Keisuke Kuroyanagi DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, 229f1cd7a081c1365da389e14bd190d7e15fa402eb8Keisuke Kuroyanagi getBigramsStructurePolicy(), getShortcutsStructurePolicy()); 23077ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi readingHelper.initWithPtNodeArrayPos(getRootPosition()); 231f1cd7a081c1365da389e14bd190d7e15fa402eb8Keisuke Kuroyanagi DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, 232fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); 23331097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi bool addedNewUnigram = false; 23431097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi if (writingHelper.addUnigramWord(&readingHelper, word, length, probability, 23531097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi &addedNewUnigram)) { 23631097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi if (addedNewUnigram) { 23731097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi mUnigramCount++; 23831097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi } 23931097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi return true; 24031097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi } else { 24131097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi return false; 24231097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi } 24366facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi} 24466facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi 24566facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagibool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int length0, 24666facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi const int *const word1, const int length1, const int probability) { 2470624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi if (!mBuffer->isUpdatable()) { 2480624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary."); 2490624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi return false; 2500624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi } 251fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi if (mBufferWithExtendableBuffer.getTailPosition() 252fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { 253fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi AKLOGE("The dictionary is too large to dynamically update."); 254fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi return false; 255fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi } 256f1cd7a081c1365da389e14bd190d7e15fa402eb8Keisuke Kuroyanagi const int word0Pos = getTerminalNodePositionOfWord(word0, length0, 257f1cd7a081c1365da389e14bd190d7e15fa402eb8Keisuke Kuroyanagi false /* forceLowerCaseSearch */); 258cb816e5e16f086d98c8d05a0a5805c1cdfaf1c02Keisuke Kuroyanagi if (word0Pos == NOT_A_DICT_POS) { 259f1cd7a081c1365da389e14bd190d7e15fa402eb8Keisuke Kuroyanagi return false; 260f1cd7a081c1365da389e14bd190d7e15fa402eb8Keisuke Kuroyanagi } 261f1cd7a081c1365da389e14bd190d7e15fa402eb8Keisuke Kuroyanagi const int word1Pos = getTerminalNodePositionOfWord(word1, length1, 262f1cd7a081c1365da389e14bd190d7e15fa402eb8Keisuke Kuroyanagi false /* forceLowerCaseSearch */); 263cb816e5e16f086d98c8d05a0a5805c1cdfaf1c02Keisuke Kuroyanagi if (word1Pos == NOT_A_DICT_POS) { 264f1cd7a081c1365da389e14bd190d7e15fa402eb8Keisuke Kuroyanagi return false; 265f1cd7a081c1365da389e14bd190d7e15fa402eb8Keisuke Kuroyanagi } 266f1cd7a081c1365da389e14bd190d7e15fa402eb8Keisuke Kuroyanagi DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, 267fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); 26831097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi bool addedNewBigram = false; 26931097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi if (writingHelper.addBigramWords(word0Pos, word1Pos, probability, &addedNewBigram)) { 27031097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi if (addedNewBigram) { 27131097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi mBigramCount++; 27231097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi } 27331097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi return true; 27431097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi } else { 27531097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi return false; 27631097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi } 27766facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi} 27866facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi 27966facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagibool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0, 28066facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi const int *const word1, const int length1) { 2810624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi if (!mBuffer->isUpdatable()) { 2820624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi AKLOGI("Warning: removeBigramWords() is called for non-updatable dictionary."); 2830624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi return false; 2840624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi } 285fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi if (mBufferWithExtendableBuffer.getTailPosition() 286fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { 287fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi AKLOGE("The dictionary is too large to dynamically update."); 288fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi return false; 289fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi } 290f1cd7a081c1365da389e14bd190d7e15fa402eb8Keisuke Kuroyanagi const int word0Pos = getTerminalNodePositionOfWord(word0, length0, 291f1cd7a081c1365da389e14bd190d7e15fa402eb8Keisuke Kuroyanagi false /* forceLowerCaseSearch */); 292cb816e5e16f086d98c8d05a0a5805c1cdfaf1c02Keisuke Kuroyanagi if (word0Pos == NOT_A_DICT_POS) { 293f1cd7a081c1365da389e14bd190d7e15fa402eb8Keisuke Kuroyanagi return false; 294f1cd7a081c1365da389e14bd190d7e15fa402eb8Keisuke Kuroyanagi } 295f1cd7a081c1365da389e14bd190d7e15fa402eb8Keisuke Kuroyanagi const int word1Pos = getTerminalNodePositionOfWord(word1, length1, 296f1cd7a081c1365da389e14bd190d7e15fa402eb8Keisuke Kuroyanagi false /* forceLowerCaseSearch */); 297cb816e5e16f086d98c8d05a0a5805c1cdfaf1c02Keisuke Kuroyanagi if (word1Pos == NOT_A_DICT_POS) { 298f1cd7a081c1365da389e14bd190d7e15fa402eb8Keisuke Kuroyanagi return false; 299f1cd7a081c1365da389e14bd190d7e15fa402eb8Keisuke Kuroyanagi } 300f1cd7a081c1365da389e14bd190d7e15fa402eb8Keisuke Kuroyanagi DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, 301fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); 30231097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi if (writingHelper.removeBigramWords(word0Pos, word1Pos)) { 30331097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi mBigramCount--; 30431097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi return true; 30531097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi } else { 30631097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi return false; 30731097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi } 30866facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi} 30966facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi 310d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagivoid DynamicPatriciaTriePolicy::flush(const char *const filePath) { 311d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi if (!mBuffer->isUpdatable()) { 312d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi AKLOGI("Warning: flush() is called for non-updatable dictionary."); 313d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi return; 314d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi } 31515605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, 316699531099630edd8416e309c914187c285af4c44Keisuke Kuroyanagi &mBigramListPolicy, &mShortcutListPolicy, false /* needsToDecay */); 31731097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi writingHelper.writeToDictFile(filePath, &mHeaderPolicy, mUnigramCount, mBigramCount); 318d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi} 319d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi 320d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagivoid DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) { 321d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi if (!mBuffer->isUpdatable()) { 322d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary."); 323d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi return; 324d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi } 32567c855ea6f882190d73df9d3fae0b56929fd6888Keisuke Kuroyanagi const bool needsToDecay = mHeaderPolicy.isDecayingDict() 32667c855ea6f882190d73df9d3fae0b56929fd6888Keisuke Kuroyanagi && (mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay( 32767c855ea6f882190d73df9d3fae0b56929fd6888Keisuke Kuroyanagi false /* mindsBlockByDecay */, mUnigramCount, mBigramCount, &mHeaderPolicy)); 32867c855ea6f882190d73df9d3fae0b56929fd6888Keisuke Kuroyanagi DynamicBigramListPolicy bigramListPolicyForGC(&mHeaderPolicy, &mBufferWithExtendableBuffer, 32967c855ea6f882190d73df9d3fae0b56929fd6888Keisuke Kuroyanagi &mShortcutListPolicy, needsToDecay); 3302cfe7f9e3b8a09aa00b18efcb82a1b3d5fed43f0Keisuke Kuroyanagi DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, 33167c855ea6f882190d73df9d3fae0b56929fd6888Keisuke Kuroyanagi &bigramListPolicyForGC, &mShortcutListPolicy, needsToDecay); 3322cfe7f9e3b8a09aa00b18efcb82a1b3d5fed43f0Keisuke Kuroyanagi writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy); 33367c855ea6f882190d73df9d3fae0b56929fd6888Keisuke Kuroyanagi mNeedsToDecayForTesting = false; 334d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi} 335d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi 336c18510049a3422c88ed3ab3bbc64944c94a611fdKeisuke Kuroyanagibool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const { 337d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi if (!mBuffer->isUpdatable()) { 338d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary."); 339d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi return false; 340d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi } 341fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi if (mBufferWithExtendableBuffer.isNearSizeLimit()) { 342fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi // Additional buffer size is near the limit. 343fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi return true; 344fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi } else if (mHeaderPolicy.getExtendedRegionSize() 345fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi + mBufferWithExtendableBuffer.getUsedAdditionalBufferSize() 346fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi > MAX_DICT_EXTENDED_REGION_SIZE) { 347fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi // Total extended region size exceeds the limit. 348fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi return true; 349fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi } else if (mBufferWithExtendableBuffer.getTailPosition() 350fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS 351fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi && mBufferWithExtendableBuffer.getUsedAdditionalBufferSize() > 0) { 352fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi // Needs to reduce dictionary size. 353fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi return true; 354fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi } else if (mHeaderPolicy.isDecayingDict()) { 35567c855ea6f882190d73df9d3fae0b56929fd6888Keisuke Kuroyanagi return mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay( 35667c855ea6f882190d73df9d3fae0b56929fd6888Keisuke Kuroyanagi mindsBlockByGC, mUnigramCount, mBigramCount, &mHeaderPolicy); 357fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi } 358fd02b2d6ee55d4aee7faab89a7a2b72764eafc47Keisuke Kuroyanagi return false; 359d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi} 360d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi 36131097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagivoid DynamicPatriciaTriePolicy::getProperty(const char *const query, char *const outResult, 362699531099630edd8416e309c914187c285af4c44Keisuke Kuroyanagi const int maxResultLength) { 36331097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi if (strncmp(query, UNIGRAM_COUNT_QUERY, maxResultLength) == 0) { 36431097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi snprintf(outResult, maxResultLength, "%d", mUnigramCount); 36531097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi } else if (strncmp(query, BIGRAM_COUNT_QUERY, maxResultLength) == 0) { 36631097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi snprintf(outResult, maxResultLength, "%d", mBigramCount); 367cfb018ba6db78f2b33b54d4811f0bf166db29792Keisuke Kuroyanagi } else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, maxResultLength) == 0) { 368cfb018ba6db78f2b33b54d4811f0bf166db29792Keisuke Kuroyanagi snprintf(outResult, maxResultLength, "%d", 369cfb018ba6db78f2b33b54d4811f0bf166db29792Keisuke Kuroyanagi mHeaderPolicy.isDecayingDict() ? ForgettingCurveUtils::MAX_UNIGRAM_COUNT : 3706d91e4ce741b71589000374de47f50887392b982Keisuke Kuroyanagi static_cast<int>(DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE)); 371cfb018ba6db78f2b33b54d4811f0bf166db29792Keisuke Kuroyanagi } else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, maxResultLength) == 0) { 372cfb018ba6db78f2b33b54d4811f0bf166db29792Keisuke Kuroyanagi snprintf(outResult, maxResultLength, "%d", 373cfb018ba6db78f2b33b54d4811f0bf166db29792Keisuke Kuroyanagi mHeaderPolicy.isDecayingDict() ? ForgettingCurveUtils::MAX_BIGRAM_COUNT : 3746d91e4ce741b71589000374de47f50887392b982Keisuke Kuroyanagi static_cast<int>(DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE)); 375699531099630edd8416e309c914187c285af4c44Keisuke Kuroyanagi } else if (strncmp(query, SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY, maxResultLength) == 0) { 376699531099630edd8416e309c914187c285af4c44Keisuke Kuroyanagi mNeedsToDecayForTesting = true; 37731097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi } 37831097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi} 37931097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi 38026de7079b6bbaac1636445ef730c0229bc1add98Keisuke Kuroynagi} // namespace latinime 381