1/* 2 * Copyright (C) 2013, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#ifndef LATINIME_VER4_PATRICIA_TRIE_POLICY_H 18#define LATINIME_VER4_PATRICIA_TRIE_POLICY_H 19 20#include <vector> 21 22#include "defines.h" 23#include "dictionary/header/header_policy.h" 24#include "dictionary/interface/dictionary_structure_with_buffer_policy.h" 25#include "dictionary/structure/pt_common/dynamic_pt_updating_helper.h" 26#include "dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h" 27#include "dictionary/structure/v4/ver4_dict_buffers.h" 28#include "dictionary/structure/v4/ver4_patricia_trie_node_reader.h" 29#include "dictionary/structure/v4/ver4_patricia_trie_node_writer.h" 30#include "dictionary/structure/v4/ver4_patricia_trie_writing_helper.h" 31#include "dictionary/structure/v4/ver4_pt_node_array_reader.h" 32#include "dictionary/utils/buffer_with_extendable_buffer.h" 33#include "dictionary/utils/entry_counters.h" 34#include "utils/int_array_view.h" 35 36namespace latinime { 37 38class DicNode; 39class DicNodeVector; 40 41// Word id = Artificial id that is stored in the PtNode looked up by the word. 42class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { 43 public: 44 Ver4PatriciaTriePolicy(Ver4DictBuffers::Ver4DictBuffersPtr buffers) 45 : mBuffers(std::move(buffers)), mHeaderPolicy(mBuffers->getHeaderPolicy()), 46 mDictBuffer(mBuffers->getWritableTrieBuffer()), 47 mShortcutPolicy(mBuffers->getMutableShortcutDictContent(), 48 mBuffers->getTerminalPositionLookupTable()), 49 mNodeReader(mDictBuffer), mPtNodeArrayReader(mDictBuffer), 50 mNodeWriter(mDictBuffer, mBuffers.get(), &mNodeReader, &mPtNodeArrayReader, 51 &mShortcutPolicy), 52 mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter), 53 mWritingHelper(mBuffers.get()), 54 mEntryCounters(mHeaderPolicy->getNgramCounts().getCountArray()), 55 mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {}; 56 57 AK_FORCE_INLINE int getRootPosition() const { 58 return 0; 59 } 60 61 void createAndGetAllChildDicNodes(const DicNode *const dicNode, 62 DicNodeVector *const childDicNodes) const; 63 64 int getCodePointsAndReturnCodePointCount(const int wordId, const int maxCodePointCount, 65 int *const outCodePoints) const; 66 67 int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const; 68 69 const WordAttributes getWordAttributesInContext(const WordIdArrayView prevWordIds, 70 const int wordId, MultiBigramMap *const multiBigramMap) const; 71 72 // TODO: Remove 73 int getProbability(const int unigramProbability, const int bigramProbability) const { 74 // Not used. 75 return NOT_A_PROBABILITY; 76 } 77 78 int getProbabilityOfWord(const WordIdArrayView prevWordIds, const int wordId) const; 79 80 void iterateNgramEntries(const WordIdArrayView prevWordIds, 81 NgramListener *const listener) const; 82 83 BinaryDictionaryShortcutIterator getShortcutIterator(const int wordId) const; 84 85 const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const { 86 return mHeaderPolicy; 87 } 88 89 bool addUnigramEntry(const CodePointArrayView wordCodePoints, 90 const UnigramProperty *const unigramProperty); 91 92 bool removeUnigramEntry(const CodePointArrayView wordCodePoints); 93 94 bool addNgramEntry(const NgramProperty *const ngramProperty); 95 96 bool removeNgramEntry(const NgramContext *const ngramContext, 97 const CodePointArrayView wordCodePoints); 98 99 bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext, 100 const CodePointArrayView wordCodePoints, const bool isValidWord, 101 const HistoricalInfo historicalInfo); 102 103 bool flush(const char *const filePath); 104 105 bool flushWithGC(const char *const filePath); 106 107 bool needsToRunGC(const bool mindsBlockByGC) const; 108 109 void getProperty(const char *const query, const int queryLength, char *const outResult, 110 const int maxResultLength); 111 112 const WordProperty getWordProperty(const CodePointArrayView wordCodePoints) const; 113 114 int getNextWordAndNextToken(const int token, int *const outCodePoints, 115 int *const outCodePointCount); 116 117 bool isCorrupted() const { 118 return mIsCorrupted; 119 } 120 121 private: 122 DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy); 123 124 static const char *const UNIGRAM_COUNT_QUERY; 125 static const char *const BIGRAM_COUNT_QUERY; 126 static const char *const MAX_UNIGRAM_COUNT_QUERY; 127 static const char *const MAX_BIGRAM_COUNT_QUERY; 128 // When the dictionary size is near the maximum size, we have to refuse dynamic operations to 129 // prevent the dictionary from overflowing. 130 static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS; 131 static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS; 132 133 const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers; 134 const HeaderPolicy *const mHeaderPolicy; 135 BufferWithExtendableBuffer *const mDictBuffer; 136 Ver4ShortcutListPolicy mShortcutPolicy; 137 Ver4PatriciaTrieNodeReader mNodeReader; 138 Ver4PtNodeArrayReader mPtNodeArrayReader; 139 Ver4PatriciaTrieNodeWriter mNodeWriter; 140 DynamicPtUpdatingHelper mUpdatingHelper; 141 Ver4PatriciaTrieWritingHelper mWritingHelper; 142 MutableEntryCounters mEntryCounters; 143 std::vector<int> mTerminalPtNodePositionsForIteratingWords; 144 mutable bool mIsCorrupted; 145 146 int getShortcutPositionOfWord(const int wordId) const; 147}; 148} // namespace latinime 149#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H 150