ver4_bigram_list_policy.cpp revision 198be3a6c5c53e63de5ed3a6a1ce618ca36ff98c
1/* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h" 18 19#include "suggest/core/dictionary/property/bigram_property.h" 20#include "suggest/policyimpl/dictionary/header/header_policy.h" 21#include "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h" 22#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h" 23#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" 24#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" 25#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" 26 27namespace latinime { 28 29void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability, 30 bool *const outHasNext, int *const bigramEntryPos) const { 31 const BigramEntry bigramEntry = 32 mBigramDictContent->getBigramEntryAndAdvancePosition(bigramEntryPos); 33 if (outBigramPos) { 34 // Lookup target PtNode position. 35 *outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition( 36 bigramEntry.getTargetTerminalId()); 37 } 38 if (outProbability) { 39 if (bigramEntry.hasHistoricalInfo()) { 40 *outProbability = 41 ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo(), 42 mHeaderPolicy); 43 } else { 44 *outProbability = bigramEntry.getProbability(); 45 } 46 } 47 if (outHasNext) { 48 *outHasNext = bigramEntry.hasNext(); 49 } 50} 51 52bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId, 53 const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) { 54 // 1. The word has no bigrams yet. 55 // 2. The word has bigrams, and there is the target in the list. 56 // 3. The word has bigrams, and there is an invalid entry that can be reclaimed. 57 // 4. The word has bigrams. We have to append new bigram entry to the list. 58 // 5. Same as 4, but the list is the last entry of the content file. 59 if (outAddedNewEntry) { 60 *outAddedNewEntry = false; 61 } 62 const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); 63 if (bigramListPos == NOT_A_DICT_POS) { 64 // Case 1. PtNode that doesn't have a bigram list. 65 // Create new bigram list. 66 if (!mBigramDictContent->createNewBigramList(terminalId)) { 67 return false; 68 } 69 const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY, 70 newTargetTerminalId); 71 const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry, 72 bigramProperty); 73 // Write an entry. 74 int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId); 75 if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite, 76 &writingPos)) { 77 AKLOGE("Cannot write bigram entry. pos: %d.", writingPos); 78 return false; 79 } 80 if (!mBigramDictContent->writeTerminator(writingPos)) { 81 AKLOGE("Cannot write bigram list terminator. pos: %d.", writingPos); 82 return false; 83 } 84 if (outAddedNewEntry) { 85 *outAddedNewEntry = true; 86 } 87 return true; 88 } 89 90 int tailEntryPos = NOT_A_DICT_POS; 91 const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos, 92 &tailEntryPos); 93 if (entryPosToUpdate == NOT_A_DICT_POS) { 94 // Case 4, 5. Add new entry to the bigram list. 95 const int contentTailPos = mBigramDictContent->getContentTailPos(); 96 // If the tail entry is at the tail of content buffer, the new entry can be written without 97 // link (Case 5). 98 const bool canAppendEntry = 99 contentTailPos == tailEntryPos + mBigramDictContent->getBigramEntrySize(); 100 const int newEntryPos = canAppendEntry ? tailEntryPos : contentTailPos; 101 int writingPos = newEntryPos; 102 // Write new entry at the tail position of the bigram content. 103 const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY, 104 newTargetTerminalId); 105 const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom( 106 &newBigramEntry, bigramProperty); 107 if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite, 108 &writingPos)) { 109 AKLOGE("Cannot write bigram entry. pos: %d.", writingPos); 110 return false; 111 } 112 if (!mBigramDictContent->writeTerminator(writingPos)) { 113 AKLOGE("Cannot write bigram list terminator. pos: %d.", writingPos); 114 return false; 115 } 116 if (!canAppendEntry) { 117 // Update link of the current tail entry. 118 if (!mBigramDictContent->writeLink(newEntryPos, tailEntryPos)) { 119 AKLOGE("Cannot update bigram entry link. pos: %d, linked entry pos: %d.", 120 tailEntryPos, newEntryPos); 121 return false; 122 } 123 } 124 if (outAddedNewEntry) { 125 *outAddedNewEntry = true; 126 } 127 return true; 128 } 129 130 // Case 2. Overwrite the existing entry. Case 3. Reclaim and reuse the existing invalid entry. 131 const BigramEntry originalBigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate); 132 if (!originalBigramEntry.isValid()) { 133 // Case 3. Reuse the existing invalid entry. outAddedNewEntry is false when an existing 134 // entry is updated. 135 if (outAddedNewEntry) { 136 *outAddedNewEntry = true; 137 } 138 } 139 const BigramEntry updatedBigramEntry = 140 originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId); 141 const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom( 142 &updatedBigramEntry, bigramProperty); 143 return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate); 144} 145 146bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) { 147 const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); 148 if (bigramListPos == NOT_A_DICT_POS) { 149 // Bigram list doesn't exist. 150 return false; 151 } 152 const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos, 153 nullptr /* outTailEntryPos */); 154 if (entryPosToUpdate == NOT_A_DICT_POS) { 155 // Bigram entry doesn't exist. 156 return false; 157 } 158 const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate); 159 if (targetTerminalId != bigramEntry.getTargetTerminalId()) { 160 // Bigram entry doesn't exist. 161 return false; 162 } 163 // Remove bigram entry by marking it as invalid entry and overwriting the original entry. 164 const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry(); 165 return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPosToUpdate); 166} 167 168bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId, 169 int *const outBigramCount) { 170 const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); 171 if (bigramListPos == NOT_A_DICT_POS) { 172 // Bigram list doesn't exist. 173 return true; 174 } 175 bool hasNext = true; 176 int readingPos = bigramListPos; 177 while (hasNext) { 178 const BigramEntry bigramEntry = 179 mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); 180 const int entryPos = readingPos - mBigramDictContent->getBigramEntrySize(); 181 hasNext = bigramEntry.hasNext(); 182 if (!bigramEntry.isValid()) { 183 continue; 184 } 185 const int targetPtNodePos = mTerminalPositionLookupTable->getTerminalPtNodePosition( 186 bigramEntry.getTargetTerminalId()); 187 if (targetPtNodePos == NOT_A_DICT_POS) { 188 // Invalidate bigram entry. 189 const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry(); 190 if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) { 191 return false; 192 } 193 } else if (bigramEntry.hasHistoricalInfo()) { 194 const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave( 195 bigramEntry.getHistoricalInfo(), mHeaderPolicy); 196 if (ForgettingCurveUtils::needsToKeep(&historicalInfo, mHeaderPolicy)) { 197 const BigramEntry updatedBigramEntry = 198 bigramEntry.updateHistoricalInfoAndGetEntry(&historicalInfo); 199 if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) { 200 return false; 201 } 202 *outBigramCount += 1; 203 } else { 204 // Remove entry. 205 const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry(); 206 if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) { 207 return false; 208 } 209 } 210 } else { 211 *outBigramCount += 1; 212 } 213 } 214 return true; 215} 216 217int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) { 218 const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); 219 if (bigramListPos == NOT_A_DICT_POS) { 220 // Bigram list doesn't exist. 221 return 0; 222 } 223 int bigramCount = 0; 224 bool hasNext = true; 225 int readingPos = bigramListPos; 226 while (hasNext) { 227 const BigramEntry bigramEntry = 228 mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); 229 hasNext = bigramEntry.hasNext(); 230 if (bigramEntry.isValid()) { 231 bigramCount++; 232 } 233 } 234 return bigramCount; 235} 236 237int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind, 238 const int bigramListPos, int *const outTailEntryPos) const { 239 if (outTailEntryPos) { 240 *outTailEntryPos = NOT_A_DICT_POS; 241 } 242 int invalidEntryPos = NOT_A_DICT_POS; 243 int readingPos = bigramListPos; 244 while (true) { 245 const BigramEntry bigramEntry = 246 mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); 247 const int entryPos = readingPos - mBigramDictContent->getBigramEntrySize(); 248 if (!bigramEntry.hasNext()) { 249 if (outTailEntryPos) { 250 *outTailEntryPos = entryPos; 251 } 252 break; 253 } 254 if (bigramEntry.getTargetTerminalId() == targetTerminalIdToFind) { 255 // Entry with same target is found. 256 return entryPos; 257 } else if (!bigramEntry.isValid()) { 258 // Invalid entry that can be reused is found. 259 invalidEntryPos = entryPos; 260 } 261 } 262 return invalidEntryPos; 263} 264 265const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom( 266 const BigramEntry *const originalBigramEntry, 267 const BigramProperty *const bigramProperty) const { 268 // TODO: Consolidate historical info and probability. 269 if (mHeaderPolicy->hasHistoricalInfoOfWords()) { 270 const HistoricalInfo historicalInfoForUpdate(bigramProperty->getTimestamp(), 271 bigramProperty->getLevel(), bigramProperty->getCount()); 272 const HistoricalInfo updatedHistoricalInfo = 273 ForgettingCurveUtils::createUpdatedHistoricalInfo( 274 originalBigramEntry->getHistoricalInfo(), bigramProperty->getProbability(), 275 &historicalInfoForUpdate, mHeaderPolicy); 276 return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo); 277 } else { 278 return originalBigramEntry->updateProbabilityAndGetEntry(bigramProperty->getProbability()); 279 } 280} 281 282} // namespace latinime 283