1/* 2 * Copyright (C) 2013, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "dictionary/structure/v4/ver4_patricia_trie_writing_helper.h" 18 19#include <cstring> 20#include <queue> 21 22#include "dictionary/header/header_policy.h" 23#include "dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h" 24#include "dictionary/structure/v4/ver4_dict_buffers.h" 25#include "dictionary/structure/v4/ver4_dict_constants.h" 26#include "dictionary/structure/v4/ver4_patricia_trie_node_reader.h" 27#include "dictionary/structure/v4/ver4_patricia_trie_node_writer.h" 28#include "dictionary/structure/v4/ver4_pt_node_array_reader.h" 29#include "dictionary/utils/buffer_with_extendable_buffer.h" 30#include "dictionary/utils/file_utils.h" 31#include "dictionary/utils/forgetting_curve_utils.h" 32#include "utils/ngram_utils.h" 33 34namespace latinime { 35 36bool Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPath, 37 const EntryCounts &entryCounts) const { 38 const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy(); 39 BufferWithExtendableBuffer headerBuffer( 40 BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE); 41 const int extendedRegionSize = headerPolicy->getExtendedRegionSize() 42 + mBuffers->getTrieBuffer()->getUsedAdditionalBufferSize(); 43 if (!headerPolicy->fillInAndWriteHeaderToBuffer(false /* updatesLastDecayedTime */, 44 entryCounts, extendedRegionSize, &headerBuffer)) { 45 AKLOGE("Cannot write header structure to buffer. " 46 "updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, trigramCount: %d," 47 "extendedRegionSize: %d", false, entryCounts.getNgramCount(NgramType::Unigram), 48 entryCounts.getNgramCount(NgramType::Bigram), 49 entryCounts.getNgramCount(NgramType::Trigram), 50 extendedRegionSize); 51 return false; 52 } 53 return mBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer); 54} 55 56bool Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos, 57 const char *const dictDirPath) { 58 const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy(); 59 Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers( 60 Ver4DictBuffers::createVer4DictBuffers(headerPolicy, 61 Ver4DictConstants::MAX_DICTIONARY_SIZE)); 62 MutableEntryCounters entryCounters; 63 if (!runGC(rootPtNodeArrayPos, headerPolicy, dictBuffers.get(), &entryCounters)) { 64 return false; 65 } 66 BufferWithExtendableBuffer headerBuffer( 67 BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE); 68 if (!headerPolicy->fillInAndWriteHeaderToBuffer(true /* updatesLastDecayedTime */, 69 entryCounters.getEntryCounts(), 0 /* extendedRegionSize */, &headerBuffer)) { 70 return false; 71 } 72 return dictBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer); 73} 74 75bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, 76 const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite, 77 MutableEntryCounters *const outEntryCounters) { 78 Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer()); 79 Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer()); 80 Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(), 81 mBuffers->getTerminalPositionLookupTable()); 82 Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(), 83 mBuffers, &ptNodeReader, &ptNodeArrayReader, &shortcutPolicy); 84 85 if (!mBuffers->getMutableLanguageModelDictContent()->updateAllProbabilityEntriesForGC( 86 headerPolicy, outEntryCounters)) { 87 AKLOGE("Failed to update probabilities in language model dict content."); 88 return false; 89 } 90 if (headerPolicy->isDecayingDict()) { 91 const EntryCounts &maxEntryCounts = headerPolicy->getMaxNgramCounts(); 92 if (!mBuffers->getMutableLanguageModelDictContent()->truncateEntries( 93 outEntryCounters->getEntryCounts(), maxEntryCounts, headerPolicy, 94 outEntryCounters)) { 95 AKLOGE("Failed to truncate entries in language model dict content."); 96 return false; 97 } 98 } 99 100 DynamicPtReadingHelper readingHelper(&ptNodeReader, &ptNodeArrayReader); 101 readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); 102 DynamicPtGcEventListeners 103 ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted 104 traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( 105 &ptNodeWriter); 106 if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( 107 &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) { 108 return false; 109 } 110 111 // Mapping from positions in mBuffer to positions in bufferToWrite. 112 PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap; 113 readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); 114 Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(), 115 buffersToWrite, &ptNodeReader, &ptNodeArrayReader, &shortcutPolicy); 116 DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer 117 traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers, 118 buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap); 119 if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( 120 &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) { 121 return false; 122 } 123 124 // Create policy instances for the GCed dictionary. 125 Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer()); 126 Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer()); 127 Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(), 128 buffersToWrite->getTerminalPositionLookupTable()); 129 Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(), 130 buffersToWrite, &newPtNodeReader, &newPtNodeArrayreader, 131 &newShortcutPolicy); 132 // Re-assign terminal IDs for valid terminal PtNodes. 133 TerminalPositionLookupTable::TerminalIdMap terminalIdMap; 134 if(!buffersToWrite->getMutableTerminalPositionLookupTable()->runGCTerminalIds( 135 &terminalIdMap)) { 136 return false; 137 } 138 // Run GC for language model dict content. 139 if (!buffersToWrite->getMutableLanguageModelDictContent()->runGC(&terminalIdMap, 140 mBuffers->getLanguageModelDictContent())) { 141 return false; 142 } 143 // Run GC for shortcut dict content. 144 if(!buffersToWrite->getMutableShortcutDictContent()->runGC(&terminalIdMap, 145 mBuffers->getShortcutDictContent())) { 146 return false; 147 } 148 DynamicPtReadingHelper newDictReadingHelper(&newPtNodeReader, &newPtNodeArrayreader); 149 newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); 150 DynamicPtGcEventListeners::TraversePolicyToUpdateAllPositionFields 151 traversePolicyToUpdateAllPositionFields(&newPtNodeWriter, &dictPositionRelocationMap); 152 if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( 153 &traversePolicyToUpdateAllPositionFields)) { 154 return false; 155 } 156 newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); 157 TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds 158 traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds(&newPtNodeWriter, &terminalIdMap); 159 if (!newDictReadingHelper.traverseAllPtNodesInPostorderDepthFirstManner( 160 &traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds)) { 161 return false; 162 } 163 return true; 164} 165 166bool Ver4PatriciaTrieWritingHelper::TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds 167 ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) { 168 if (!ptNodeParams->isTerminal()) { 169 return true; 170 } 171 TerminalPositionLookupTable::TerminalIdMap::const_iterator it = 172 mTerminalIdMap->find(ptNodeParams->getTerminalId()); 173 if (it == mTerminalIdMap->end()) { 174 AKLOGE("terminal Id %d is not in the terminal position map. map size: %zd", 175 ptNodeParams->getTerminalId(), mTerminalIdMap->size()); 176 return false; 177 } 178 if (!mPtNodeWriter->updateTerminalId(ptNodeParams, it->second)) { 179 AKLOGE("Cannot update terminal id. %d -> %d", it->first, it->second); 180 return false; 181 } 182 return true; 183} 184 185} // namespace latinime 186