ver4_patricia_trie_node_writer.cpp revision 9069d30043d5182dfd38465ad9bbc11ad73fab7c
1/*
2 * Copyright (C) 2013, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * !!!!! DO NOT EDIT THIS FILE !!!!!
19 *
20 * This file was generated from
21 *   suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
22 */
23
24#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h"
25
26#include "suggest/core/dictionary/property/unigram_property.h"
27#include "suggest/policyimpl/dictionary/header/header_policy.h"
28#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
29#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
30#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
31#include "suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h"
32#include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h"
33#include "suggest/policyimpl/dictionary/structure/backward/v402/shortcut/ver4_shortcut_list_policy.h"
34#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h"
35#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h"
36#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
37#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
38
39namespace latinime {
40namespace backward {
41namespace v402 {
42
43const int Ver4PatriciaTrieNodeWriter::CHILDREN_POSITION_FIELD_SIZE = 3;
44
45bool Ver4PatriciaTrieNodeWriter::markPtNodeAsDeleted(
46        const PtNodeParams *const toBeUpdatedPtNodeParams) {
47    int pos = toBeUpdatedPtNodeParams->getHeadPos();
48    const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos);
49    const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer);
50    if (usesAdditionalBuffer) {
51        pos -= mTrieBuffer->getOriginalBufferSize();
52    }
53    // Read original flags
54    const PatriciaTrieReadingUtils::NodeFlags originalFlags =
55            PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
56    const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
57            DynamicPtReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
58                    true /* isDeleted */, false /* willBecomeNonTerminal */);
59    int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
60    // Update flags.
61    if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
62            &writingPos)) {
63        return false;
64    }
65    if (toBeUpdatedPtNodeParams->isTerminal()) {
66        // The PtNode is a terminal. Delete entry from the terminal position lookup table.
67        return mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition(
68                toBeUpdatedPtNodeParams->getTerminalId(), NOT_A_DICT_POS /* ptNodePos */);
69    } else {
70        return true;
71    }
72}
73
74bool Ver4PatriciaTrieNodeWriter::markPtNodeAsMoved(
75        const PtNodeParams *const toBeUpdatedPtNodeParams,
76        const int movedPos, const int bigramLinkedNodePos) {
77    int pos = toBeUpdatedPtNodeParams->getHeadPos();
78    const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos);
79    const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer);
80    if (usesAdditionalBuffer) {
81        pos -= mTrieBuffer->getOriginalBufferSize();
82    }
83    // Read original flags
84    const PatriciaTrieReadingUtils::NodeFlags originalFlags =
85            PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
86    const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
87            DynamicPtReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
88                    false /* isDeleted */, false /* willBecomeNonTerminal */);
89    int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
90    // Update flags.
91    if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
92            &writingPos)) {
93        return false;
94    }
95    // Update moved position, which is stored in the parent offset field.
96    if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(
97            mTrieBuffer, movedPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) {
98        return false;
99    }
100    if (toBeUpdatedPtNodeParams->hasChildren()) {
101        // Update children's parent position.
102        mReadingHelper.initWithPtNodeArrayPos(toBeUpdatedPtNodeParams->getChildrenPos());
103        while (!mReadingHelper.isEnd()) {
104            const PtNodeParams childPtNodeParams(mReadingHelper.getPtNodeParams());
105            int parentOffsetFieldPos = childPtNodeParams.getHeadPos()
106                    + DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE;
107            if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(
108                    mTrieBuffer, bigramLinkedNodePos, childPtNodeParams.getHeadPos(),
109                    &parentOffsetFieldPos)) {
110                // Parent offset cannot be written because of a bug or a broken dictionary; thus,
111                // we give up to update dictionary.
112                return false;
113            }
114            mReadingHelper.readNextSiblingNode(childPtNodeParams);
115        }
116    }
117    return true;
118}
119
120bool Ver4PatriciaTrieNodeWriter::markPtNodeAsWillBecomeNonTerminal(
121        const PtNodeParams *const toBeUpdatedPtNodeParams) {
122    int pos = toBeUpdatedPtNodeParams->getHeadPos();
123    const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos);
124    const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer);
125    if (usesAdditionalBuffer) {
126        pos -= mTrieBuffer->getOriginalBufferSize();
127    }
128    // Read original flags
129    const PatriciaTrieReadingUtils::NodeFlags originalFlags =
130            PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
131    const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
132            DynamicPtReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
133                    false /* isDeleted */, true /* willBecomeNonTerminal */);
134    if (!mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition(
135            toBeUpdatedPtNodeParams->getTerminalId(), NOT_A_DICT_POS /* ptNodePos */)) {
136        AKLOGE("Cannot update terminal position lookup table. terminal id: %d",
137                toBeUpdatedPtNodeParams->getTerminalId());
138        return false;
139    }
140    // Update flags.
141    int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
142    return DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
143            &writingPos);
144}
145
146bool Ver4PatriciaTrieNodeWriter::updatePtNodeUnigramProperty(
147        const PtNodeParams *const toBeUpdatedPtNodeParams,
148        const UnigramProperty *const unigramProperty) {
149    // Update probability and historical information.
150    // TODO: Update other information in the unigram property.
151    if (!toBeUpdatedPtNodeParams->isTerminal()) {
152        return false;
153    }
154    const ProbabilityEntry originalProbabilityEntry =
155            mBuffers->getProbabilityDictContent()->getProbabilityEntry(
156                    toBeUpdatedPtNodeParams->getTerminalId());
157    const ProbabilityEntry probabilityEntry = createUpdatedEntryFrom(&originalProbabilityEntry,
158            unigramProperty);
159    return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
160            toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry);
161}
162
163bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
164        const PtNodeParams *const toBeUpdatedPtNodeParams, bool *const outNeedsToKeepPtNode) {
165    if (!toBeUpdatedPtNodeParams->isTerminal()) {
166        AKLOGE("updatePtNodeProbabilityAndGetNeedsToSaveForGC is called for non-terminal PtNode.");
167        return false;
168    }
169    const ProbabilityEntry originalProbabilityEntry =
170            mBuffers->getProbabilityDictContent()->getProbabilityEntry(
171                    toBeUpdatedPtNodeParams->getTerminalId());
172    if (originalProbabilityEntry.hasHistoricalInfo()) {
173        const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
174                originalProbabilityEntry.getHistoricalInfo(), mHeaderPolicy);
175        const ProbabilityEntry probabilityEntry =
176                originalProbabilityEntry.createEntryWithUpdatedHistoricalInfo(&historicalInfo);
177        if (!mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
178                toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry)) {
179            AKLOGE("Cannot write updated probability entry. terminalId: %d",
180                    toBeUpdatedPtNodeParams->getTerminalId());
181            return false;
182        }
183        const bool isValid = ForgettingCurveUtils::needsToKeep(&historicalInfo, mHeaderPolicy);
184        if (!isValid) {
185            if (!markPtNodeAsWillBecomeNonTerminal(toBeUpdatedPtNodeParams)) {
186                AKLOGE("Cannot mark PtNode as willBecomeNonTerminal.");
187                return false;
188            }
189        }
190        *outNeedsToKeepPtNode = isValid;
191    } else {
192        // No need to update probability.
193        *outNeedsToKeepPtNode = true;
194    }
195    return true;
196}
197
198bool Ver4PatriciaTrieNodeWriter::updateChildrenPosition(
199        const PtNodeParams *const toBeUpdatedPtNodeParams, const int newChildrenPosition) {
200    int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos();
201    return DynamicPtWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer,
202            newChildrenPosition, &childrenPosFieldPos);
203}
204
205bool Ver4PatriciaTrieNodeWriter::updateTerminalId(const PtNodeParams *const toBeUpdatedPtNodeParams,
206        const int newTerminalId) {
207    return mTrieBuffer->writeUint(newTerminalId, Ver4DictConstants::TERMINAL_ID_FIELD_SIZE,
208            toBeUpdatedPtNodeParams->getTerminalIdFieldPos());
209}
210
211bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition(
212        const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) {
213    return writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, 0 /* outTerminalId */,
214            ptNodeWritingPos);
215}
216
217
218bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
219        const PtNodeParams *const ptNodeParams, const UnigramProperty *const unigramProperty,
220        int *const ptNodeWritingPos) {
221    int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
222    if (!writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, &terminalId,
223            ptNodeWritingPos)) {
224        return false;
225    }
226    // Write probability.
227    ProbabilityEntry newProbabilityEntry;
228    const ProbabilityEntry probabilityEntryToWrite = createUpdatedEntryFrom(
229            &newProbabilityEntry, unigramProperty);
230    return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(terminalId,
231            &probabilityEntryToWrite);
232}
233
234bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
235        const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) {
236    if (!mBigramPolicy->addNewEntry(prevWordIds[0], wordId, bigramProperty, outAddedNewEntry)) {
237        AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d",
238                sourcePtNodeParams->getTerminalId(), targetPtNodeParam->getTerminalId());
239        return false;
240    }
241    const int ptNodePos =
242            mBuffers->getTerminalPositionLookupTable()->getTerminalPtNodePosition(prevWordIds[0]);
243    const PtNodeParams sourcePtNodeParams =
244            mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
245    if (!sourcePtNodeParams.hasBigrams()) {
246        // Update has bigrams flag.
247        return updatePtNodeFlags(sourcePtNodeParams.getHeadPos(),
248                sourcePtNodeParams.isBlacklisted(), sourcePtNodeParams.isNotAWord(),
249                sourcePtNodeParams.isTerminal(), sourcePtNodeParams.hasShortcutTargets(),
250                true /* hasBigrams */,
251                sourcePtNodeParams.getCodePointCount() > 1 /* hasMultipleChars */);
252    }
253    return true;
254}
255
256bool Ver4PatriciaTrieNodeWriter::removeNgramEntry(const WordIdArrayView prevWordIds,
257        const int wordId) {
258    return mBigramPolicy->removeEntry(prevWordIds[0], wordId);
259}
260
261bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries(
262            const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount) {
263    return mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(
264            sourcePtNodeParams->getTerminalId(), outBigramEntryCount);
265}
266
267bool Ver4PatriciaTrieNodeWriter::updateAllPositionFields(
268        const PtNodeParams *const toBeUpdatedPtNodeParams,
269        const DictPositionRelocationMap *const dictPositionRelocationMap,
270        int *const outBigramEntryCount) {
271    int parentPos = toBeUpdatedPtNodeParams->getParentPos();
272    if (parentPos != NOT_A_DICT_POS) {
273        PtNodeWriter::PtNodePositionRelocationMap::const_iterator it =
274                dictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos);
275        if (it != dictPositionRelocationMap->mPtNodePositionRelocationMap.end()) {
276            parentPos = it->second;
277        }
278    }
279    int writingPos = toBeUpdatedPtNodeParams->getHeadPos()
280            + DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE;
281    // Write updated parent offset.
282    if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer,
283            parentPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) {
284        return false;
285    }
286
287    // Updates children position.
288    int childrenPos = toBeUpdatedPtNodeParams->getChildrenPos();
289    if (childrenPos != NOT_A_DICT_POS) {
290        PtNodeWriter::PtNodeArrayPositionRelocationMap::const_iterator it =
291                dictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos);
292        if (it != dictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.end()) {
293            childrenPos = it->second;
294        }
295    }
296    if (!updateChildrenPosition(toBeUpdatedPtNodeParams, childrenPos)) {
297        return false;
298    }
299
300    // Counts bigram entries.
301    if (outBigramEntryCount) {
302        *outBigramEntryCount = mBigramPolicy->getBigramEntryConut(
303                toBeUpdatedPtNodeParams->getTerminalId());
304    }
305    return true;
306}
307
308bool Ver4PatriciaTrieNodeWriter::addShortcutTarget(const PtNodeParams *const ptNodeParams,
309        const int *const targetCodePoints, const int targetCodePointCount,
310        const int shortcutProbability) {
311    if (!mShortcutPolicy->addNewShortcut(ptNodeParams->getTerminalId(),
312            targetCodePoints, targetCodePointCount, shortcutProbability)) {
313        AKLOGE("Cannot add new shortuct entry. terminalId: %d", ptNodeParams->getTerminalId());
314        return false;
315    }
316    if (!ptNodeParams->hasShortcutTargets()) {
317        // Update has shortcut targets flag.
318        return updatePtNodeFlags(ptNodeParams->getHeadPos(),
319                ptNodeParams->isBlacklisted(), ptNodeParams->isNotAWord(),
320                ptNodeParams->isTerminal(), true /* hasShortcutTargets */,
321                ptNodeParams->hasBigrams(),
322                ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
323    }
324    return true;
325}
326
327bool Ver4PatriciaTrieNodeWriter::updatePtNodeHasBigramsAndShortcutTargetsFlags(
328        const PtNodeParams *const ptNodeParams) {
329    const bool hasBigrams = mBuffers->getBigramDictContent()->getBigramListHeadPos(
330            ptNodeParams->getTerminalId()) != NOT_A_DICT_POS;
331    const bool hasShortcutTargets = mBuffers->getShortcutDictContent()->getShortcutListHeadPos(
332            ptNodeParams->getTerminalId()) != NOT_A_DICT_POS;
333    return updatePtNodeFlags(ptNodeParams->getHeadPos(), ptNodeParams->isBlacklisted(),
334            ptNodeParams->isNotAWord(), ptNodeParams->isTerminal(), hasShortcutTargets,
335            hasBigrams, ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
336}
337
338bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition(
339        const PtNodeParams *const ptNodeParams, int *const outTerminalId,
340        int *const ptNodeWritingPos) {
341    const int nodePos = *ptNodeWritingPos;
342    // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
343    // PtNode writing.
344    if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer,
345            0 /* nodeFlags */, ptNodeWritingPos)) {
346        return false;
347    }
348    // Calculate a parent offset and write the offset.
349    if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer,
350            ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) {
351        return false;
352    }
353    // Write code points
354    if (!DynamicPtWritingUtils::writeCodePointsAndAdvancePosition(mTrieBuffer,
355            ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) {
356        return false;
357    }
358    int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
359    if (!ptNodeParams->willBecomeNonTerminal()) {
360        if (ptNodeParams->getTerminalId() != Ver4DictConstants::NOT_A_TERMINAL_ID) {
361            terminalId = ptNodeParams->getTerminalId();
362        } else if (ptNodeParams->isTerminal()) {
363            // Write terminal information using a new terminal id.
364            // Get a new unused terminal id.
365            terminalId = mBuffers->getTerminalPositionLookupTable()->getNextTerminalId();
366        }
367    }
368    const int isTerminal = terminalId != Ver4DictConstants::NOT_A_TERMINAL_ID;
369    if (isTerminal) {
370        // Update the lookup table.
371        if (!mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition(
372                terminalId, nodePos)) {
373            return false;
374        }
375        // Write terminal Id.
376        if (!mTrieBuffer->writeUintAndAdvancePosition(terminalId,
377                Ver4DictConstants::TERMINAL_ID_FIELD_SIZE, ptNodeWritingPos)) {
378            return false;
379        }
380        if (outTerminalId) {
381            *outTerminalId = terminalId;
382        }
383    }
384    // Write children position
385    if (!DynamicPtWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer,
386            ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
387        return false;
388    }
389    return updatePtNodeFlags(nodePos, ptNodeParams->isBlacklisted(), ptNodeParams->isNotAWord(),
390            isTerminal, ptNodeParams->hasShortcutTargets(), ptNodeParams->hasBigrams(),
391            ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
392}
393
394const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
395        const ProbabilityEntry *const originalProbabilityEntry,
396        const UnigramProperty *const unigramProperty) const {
397    // TODO: Consolidate historical info and probability.
398    if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
399        const HistoricalInfo historicalInfoForUpdate(unigramProperty->getTimestamp(),
400                unigramProperty->getLevel(), unigramProperty->getCount());
401        const HistoricalInfo updatedHistoricalInfo =
402                ForgettingCurveUtils::createUpdatedHistoricalInfo(
403                        originalProbabilityEntry->getHistoricalInfo(),
404                        unigramProperty->getProbability(), &historicalInfoForUpdate, mHeaderPolicy);
405        return originalProbabilityEntry->createEntryWithUpdatedHistoricalInfo(
406                &updatedHistoricalInfo);
407    } else {
408        return originalProbabilityEntry->createEntryWithUpdatedProbability(
409                unigramProperty->getProbability());
410    }
411}
412
413bool Ver4PatriciaTrieNodeWriter::updatePtNodeFlags(const int ptNodePos,
414        const bool isBlacklisted, const bool isNotAWord, const bool isTerminal,
415        const bool hasShortcutTargets, const bool hasBigrams, const bool hasMultipleChars) {
416    // Create node flags and write them.
417    PatriciaTrieReadingUtils::NodeFlags nodeFlags =
418            PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord, isTerminal,
419                    hasShortcutTargets, hasBigrams, hasMultipleChars,
420                    CHILDREN_POSITION_FIELD_SIZE);
421    if (!DynamicPtWritingUtils::writeFlags(mTrieBuffer, nodeFlags, ptNodePos)) {
422        AKLOGE("Cannot write PtNode flags. flags: %x, pos: %d", nodeFlags, ptNodePos);
423        return false;
424    }
425    return true;
426}
427
428} // namespace v402
429} // namespace backward
430} // namespace latinime
431