ver4_bigram_list_policy.cpp revision 198be3a6c5c53e63de5ed3a6a1ce618ca36ff98c
1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h"
18
19#include "suggest/core/dictionary/property/bigram_property.h"
20#include "suggest/policyimpl/dictionary/header/header_policy.h"
21#include "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h"
22#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
23#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
24#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
25#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
26
27namespace latinime {
28
29void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
30        bool *const outHasNext, int *const bigramEntryPos) const {
31    const BigramEntry bigramEntry =
32            mBigramDictContent->getBigramEntryAndAdvancePosition(bigramEntryPos);
33    if (outBigramPos) {
34        // Lookup target PtNode position.
35        *outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
36                bigramEntry.getTargetTerminalId());
37    }
38    if (outProbability) {
39        if (bigramEntry.hasHistoricalInfo()) {
40            *outProbability =
41                    ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo(),
42                            mHeaderPolicy);
43        } else {
44            *outProbability = bigramEntry.getProbability();
45        }
46    }
47    if (outHasNext) {
48        *outHasNext = bigramEntry.hasNext();
49    }
50}
51
52bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId,
53        const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) {
54    // 1. The word has no bigrams yet.
55    // 2. The word has bigrams, and there is the target in the list.
56    // 3. The word has bigrams, and there is an invalid entry that can be reclaimed.
57    // 4. The word has bigrams. We have to append new bigram entry to the list.
58    // 5. Same as 4, but the list is the last entry of the content file.
59    if (outAddedNewEntry) {
60        *outAddedNewEntry = false;
61    }
62    const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
63    if (bigramListPos == NOT_A_DICT_POS) {
64        // Case 1. PtNode that doesn't have a bigram list.
65        // Create new bigram list.
66        if (!mBigramDictContent->createNewBigramList(terminalId)) {
67            return false;
68        }
69        const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
70                newTargetTerminalId);
71        const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry,
72                bigramProperty);
73        // Write an entry.
74        int writingPos =  mBigramDictContent->getBigramListHeadPos(terminalId);
75        if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite,
76                &writingPos)) {
77            AKLOGE("Cannot write bigram entry. pos: %d.", writingPos);
78            return false;
79        }
80        if (!mBigramDictContent->writeTerminator(writingPos)) {
81            AKLOGE("Cannot write bigram list terminator. pos: %d.", writingPos);
82            return false;
83        }
84        if (outAddedNewEntry) {
85            *outAddedNewEntry = true;
86        }
87        return true;
88    }
89
90    int tailEntryPos = NOT_A_DICT_POS;
91    const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos,
92            &tailEntryPos);
93    if (entryPosToUpdate == NOT_A_DICT_POS) {
94        // Case 4, 5. Add new entry to the bigram list.
95        const int contentTailPos = mBigramDictContent->getContentTailPos();
96        // If the tail entry is at the tail of content buffer, the new entry can be written without
97        // link (Case 5).
98        const bool canAppendEntry =
99                contentTailPos == tailEntryPos + mBigramDictContent->getBigramEntrySize();
100        const int newEntryPos = canAppendEntry ? tailEntryPos : contentTailPos;
101        int writingPos = newEntryPos;
102        // Write new entry at the tail position of the bigram content.
103        const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
104                newTargetTerminalId);
105        const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
106                &newBigramEntry, bigramProperty);
107        if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite,
108                &writingPos)) {
109            AKLOGE("Cannot write bigram entry. pos: %d.", writingPos);
110            return false;
111        }
112        if (!mBigramDictContent->writeTerminator(writingPos)) {
113            AKLOGE("Cannot write bigram list terminator. pos: %d.", writingPos);
114            return false;
115        }
116        if (!canAppendEntry) {
117            // Update link of the current tail entry.
118            if (!mBigramDictContent->writeLink(newEntryPos, tailEntryPos)) {
119                AKLOGE("Cannot update bigram entry link. pos: %d, linked entry pos: %d.",
120                        tailEntryPos, newEntryPos);
121                return false;
122            }
123        }
124        if (outAddedNewEntry) {
125            *outAddedNewEntry = true;
126        }
127        return true;
128    }
129
130    // Case 2. Overwrite the existing entry. Case 3. Reclaim and reuse the existing invalid entry.
131    const BigramEntry originalBigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
132    if (!originalBigramEntry.isValid()) {
133        // Case 3. Reuse the existing invalid entry. outAddedNewEntry is false when an existing
134        // entry is updated.
135        if (outAddedNewEntry) {
136            *outAddedNewEntry = true;
137        }
138    }
139    const BigramEntry updatedBigramEntry =
140            originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
141    const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
142            &updatedBigramEntry, bigramProperty);
143    return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
144}
145
146bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) {
147    const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
148    if (bigramListPos == NOT_A_DICT_POS) {
149        // Bigram list doesn't exist.
150        return false;
151    }
152    const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos,
153            nullptr /* outTailEntryPos */);
154    if (entryPosToUpdate == NOT_A_DICT_POS) {
155        // Bigram entry doesn't exist.
156        return false;
157    }
158    const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
159    if (targetTerminalId != bigramEntry.getTargetTerminalId()) {
160        // Bigram entry doesn't exist.
161        return false;
162    }
163    // Remove bigram entry by marking it as invalid entry and overwriting the original entry.
164    const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
165    return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPosToUpdate);
166}
167
168bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
169        int *const outBigramCount) {
170    const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
171    if (bigramListPos == NOT_A_DICT_POS) {
172        // Bigram list doesn't exist.
173        return true;
174    }
175    bool hasNext = true;
176    int readingPos = bigramListPos;
177    while (hasNext) {
178        const BigramEntry bigramEntry =
179                mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
180        const int entryPos = readingPos - mBigramDictContent->getBigramEntrySize();
181        hasNext = bigramEntry.hasNext();
182        if (!bigramEntry.isValid()) {
183            continue;
184        }
185        const int targetPtNodePos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
186                bigramEntry.getTargetTerminalId());
187        if (targetPtNodePos == NOT_A_DICT_POS) {
188            // Invalidate bigram entry.
189            const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
190            if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
191                return false;
192            }
193        } else if (bigramEntry.hasHistoricalInfo()) {
194            const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
195                    bigramEntry.getHistoricalInfo(), mHeaderPolicy);
196            if (ForgettingCurveUtils::needsToKeep(&historicalInfo, mHeaderPolicy)) {
197                const BigramEntry updatedBigramEntry =
198                        bigramEntry.updateHistoricalInfoAndGetEntry(&historicalInfo);
199                if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
200                    return false;
201                }
202                *outBigramCount += 1;
203            } else {
204                // Remove entry.
205                const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
206                if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
207                    return false;
208                }
209            }
210        } else {
211            *outBigramCount += 1;
212        }
213    }
214    return true;
215}
216
217int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) {
218    const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
219    if (bigramListPos == NOT_A_DICT_POS) {
220        // Bigram list doesn't exist.
221        return 0;
222    }
223    int bigramCount = 0;
224    bool hasNext = true;
225    int readingPos = bigramListPos;
226    while (hasNext) {
227        const BigramEntry bigramEntry =
228                mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
229        hasNext = bigramEntry.hasNext();
230        if (bigramEntry.isValid()) {
231            bigramCount++;
232        }
233    }
234    return bigramCount;
235}
236
237int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
238        const int bigramListPos, int *const outTailEntryPos) const {
239    if (outTailEntryPos) {
240        *outTailEntryPos = NOT_A_DICT_POS;
241    }
242    int invalidEntryPos = NOT_A_DICT_POS;
243    int readingPos = bigramListPos;
244    while (true) {
245        const BigramEntry bigramEntry =
246                mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
247        const int entryPos = readingPos - mBigramDictContent->getBigramEntrySize();
248        if (!bigramEntry.hasNext()) {
249            if (outTailEntryPos) {
250                *outTailEntryPos = entryPos;
251            }
252            break;
253        }
254        if (bigramEntry.getTargetTerminalId() == targetTerminalIdToFind) {
255            // Entry with same target is found.
256            return entryPos;
257        } else if (!bigramEntry.isValid()) {
258            // Invalid entry that can be reused is found.
259            invalidEntryPos = entryPos;
260        }
261    }
262    return invalidEntryPos;
263}
264
265const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
266        const BigramEntry *const originalBigramEntry,
267        const BigramProperty *const bigramProperty) const {
268    // TODO: Consolidate historical info and probability.
269    if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
270        const HistoricalInfo historicalInfoForUpdate(bigramProperty->getTimestamp(),
271                bigramProperty->getLevel(), bigramProperty->getCount());
272        const HistoricalInfo updatedHistoricalInfo =
273                ForgettingCurveUtils::createUpdatedHistoricalInfo(
274                        originalBigramEntry->getHistoricalInfo(), bigramProperty->getProbability(),
275                        &historicalInfoForUpdate, mHeaderPolicy);
276        return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
277    } else {
278        return originalBigramEntry->updateProbabilityAndGetEntry(bigramProperty->getProbability());
279    }
280}
281
282} // namespace latinime
283