1/*
2 * Copyright (C) 2013, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "dictionary/structure/v4/ver4_patricia_trie_writing_helper.h"
18
19#include <cstring>
20#include <queue>
21
22#include "dictionary/header/header_policy.h"
23#include "dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h"
24#include "dictionary/structure/v4/ver4_dict_buffers.h"
25#include "dictionary/structure/v4/ver4_dict_constants.h"
26#include "dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
27#include "dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
28#include "dictionary/structure/v4/ver4_pt_node_array_reader.h"
29#include "dictionary/utils/buffer_with_extendable_buffer.h"
30#include "dictionary/utils/file_utils.h"
31#include "dictionary/utils/forgetting_curve_utils.h"
32#include "utils/ngram_utils.h"
33
34namespace latinime {
35
36bool Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPath,
37        const EntryCounts &entryCounts) const {
38    const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
39    BufferWithExtendableBuffer headerBuffer(
40            BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
41    const int extendedRegionSize = headerPolicy->getExtendedRegionSize()
42            + mBuffers->getTrieBuffer()->getUsedAdditionalBufferSize();
43    if (!headerPolicy->fillInAndWriteHeaderToBuffer(false /* updatesLastDecayedTime */,
44            entryCounts, extendedRegionSize, &headerBuffer)) {
45        AKLOGE("Cannot write header structure to buffer. "
46                "updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, trigramCount: %d,"
47                "extendedRegionSize: %d", false, entryCounts.getNgramCount(NgramType::Unigram),
48                entryCounts.getNgramCount(NgramType::Bigram),
49                entryCounts.getNgramCount(NgramType::Trigram),
50                extendedRegionSize);
51        return false;
52    }
53    return mBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
54}
55
56bool Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
57        const char *const dictDirPath) {
58    const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
59    Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers(
60            Ver4DictBuffers::createVer4DictBuffers(headerPolicy,
61                    Ver4DictConstants::MAX_DICTIONARY_SIZE));
62    MutableEntryCounters entryCounters;
63    if (!runGC(rootPtNodeArrayPos, headerPolicy, dictBuffers.get(), &entryCounters)) {
64        return false;
65    }
66    BufferWithExtendableBuffer headerBuffer(
67            BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
68    if (!headerPolicy->fillInAndWriteHeaderToBuffer(true /* updatesLastDecayedTime */,
69            entryCounters.getEntryCounts(), 0 /* extendedRegionSize */, &headerBuffer)) {
70        return false;
71    }
72    return dictBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
73}
74
75bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
76        const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite,
77        MutableEntryCounters *const outEntryCounters) {
78    Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer());
79    Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer());
80    Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(),
81            mBuffers->getTerminalPositionLookupTable());
82    Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(),
83            mBuffers, &ptNodeReader, &ptNodeArrayReader, &shortcutPolicy);
84
85    if (!mBuffers->getMutableLanguageModelDictContent()->updateAllProbabilityEntriesForGC(
86            headerPolicy, outEntryCounters)) {
87        AKLOGE("Failed to update probabilities in language model dict content.");
88        return false;
89    }
90    if (headerPolicy->isDecayingDict()) {
91        const EntryCounts &maxEntryCounts = headerPolicy->getMaxNgramCounts();
92        if (!mBuffers->getMutableLanguageModelDictContent()->truncateEntries(
93                outEntryCounters->getEntryCounts(), maxEntryCounts, headerPolicy,
94                outEntryCounters)) {
95            AKLOGE("Failed to truncate entries in language model dict content.");
96            return false;
97        }
98    }
99
100    DynamicPtReadingHelper readingHelper(&ptNodeReader, &ptNodeArrayReader);
101    readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
102    DynamicPtGcEventListeners
103            ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
104                    traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
105                            &ptNodeWriter);
106    if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
107            &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
108        return false;
109    }
110
111    // Mapping from positions in mBuffer to positions in bufferToWrite.
112    PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
113    readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
114    Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(),
115            buffersToWrite, &ptNodeReader, &ptNodeArrayReader, &shortcutPolicy);
116    DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
117            traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers,
118                    buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap);
119    if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
120            &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) {
121        return false;
122    }
123
124    // Create policy instances for the GCed dictionary.
125    Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer());
126    Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer());
127    Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(),
128            buffersToWrite->getTerminalPositionLookupTable());
129    Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),
130            buffersToWrite, &newPtNodeReader, &newPtNodeArrayreader,
131            &newShortcutPolicy);
132    // Re-assign terminal IDs for valid terminal PtNodes.
133    TerminalPositionLookupTable::TerminalIdMap terminalIdMap;
134    if(!buffersToWrite->getMutableTerminalPositionLookupTable()->runGCTerminalIds(
135            &terminalIdMap)) {
136        return false;
137    }
138    // Run GC for language model dict content.
139    if (!buffersToWrite->getMutableLanguageModelDictContent()->runGC(&terminalIdMap,
140            mBuffers->getLanguageModelDictContent())) {
141        return false;
142    }
143    // Run GC for shortcut dict content.
144    if(!buffersToWrite->getMutableShortcutDictContent()->runGC(&terminalIdMap,
145            mBuffers->getShortcutDictContent())) {
146        return false;
147    }
148    DynamicPtReadingHelper newDictReadingHelper(&newPtNodeReader, &newPtNodeArrayreader);
149    newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
150    DynamicPtGcEventListeners::TraversePolicyToUpdateAllPositionFields
151            traversePolicyToUpdateAllPositionFields(&newPtNodeWriter, &dictPositionRelocationMap);
152    if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
153            &traversePolicyToUpdateAllPositionFields)) {
154        return false;
155    }
156    newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
157    TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
158            traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds(&newPtNodeWriter, &terminalIdMap);
159    if (!newDictReadingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
160            &traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds)) {
161        return false;
162    }
163    return true;
164}
165
166bool Ver4PatriciaTrieWritingHelper::TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
167        ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
168    if (!ptNodeParams->isTerminal()) {
169        return true;
170    }
171    TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
172            mTerminalIdMap->find(ptNodeParams->getTerminalId());
173    if (it == mTerminalIdMap->end()) {
174        AKLOGE("terminal Id %d is not in the terminal position map. map size: %zd",
175                ptNodeParams->getTerminalId(), mTerminalIdMap->size());
176        return false;
177    }
178    if (!mPtNodeWriter->updateTerminalId(ptNodeParams, it->second)) {
179        AKLOGE("Cannot update terminal id. %d -> %d", it->first, it->second);
180        return false;
181    }
182    return true;
183}
184
185} // namespace latinime
186