ver4_patricia_trie_policy.h revision bd1f59bda5ad0b7028ec06c2de078f1623e76cdd
1/* 2 * Copyright (C) 2013, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#ifndef LATINIME_VER4_PATRICIA_TRIE_POLICY_H 18#define LATINIME_VER4_PATRICIA_TRIE_POLICY_H 19 20#include <vector> 21 22#include "defines.h" 23#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" 24#include "suggest/policyimpl/dictionary/header/header_policy.h" 25#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h" 26#include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h" 27#include "suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h" 28#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" 29#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" 30#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h" 31#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h" 32#include "suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h" 33#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" 34 35namespace latinime { 36 37class DicNode; 38class DicNodeVector; 39 40class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { 41 public: 42 Ver4PatriciaTriePolicy(Ver4DictBuffers::Ver4DictBuffersPtr buffers) 43 : mBuffers(std::move(buffers)), mHeaderPolicy(mBuffers->getHeaderPolicy()), 44 mDictBuffer(mBuffers->getWritableTrieBuffer()), 45 mBigramPolicy(mBuffers->getMutableBigramDictContent(), 46 mBuffers->getTerminalPositionLookupTable(), mHeaderPolicy), 47 mShortcutPolicy(mBuffers->getMutableShortcutDictContent(), 48 mBuffers->getTerminalPositionLookupTable()), 49 mNodeReader(mDictBuffer, mBuffers->getProbabilityDictContent(), mHeaderPolicy), 50 mPtNodeArrayReader(mDictBuffer), 51 mNodeWriter(mDictBuffer, mBuffers.get(), mHeaderPolicy, &mNodeReader, 52 &mPtNodeArrayReader, &mBigramPolicy, &mShortcutPolicy), 53 mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter), 54 mWritingHelper(mBuffers.get()), 55 mUnigramCount(mHeaderPolicy->getUnigramCount()), 56 mBigramCount(mHeaderPolicy->getBigramCount()), 57 mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {}; 58 59 AK_FORCE_INLINE int getRootPosition() const { 60 return 0; 61 } 62 63 void createAndGetAllChildDicNodes(const DicNode *const dicNode, 64 DicNodeVector *const childDicNodes) const; 65 66 int getCodePointsAndProbabilityAndReturnCodePointCount( 67 const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints, 68 int *const outUnigramProbability) const; 69 70 int getTerminalPtNodePositionOfWord(const int *const inWord, 71 const int length, const bool forceLowerCaseSearch) const; 72 73 int getProbability(const int unigramProbability, const int bigramProbability) const; 74 75 int getProbabilityOfPtNode(const int *const prevWordsPtNodePos, const int ptNodePos) const; 76 77 void iterateNgramEntries(const int *const prevWordsPtNodePos, 78 NgramListener *const listener) const; 79 80 int getShortcutPositionOfPtNode(const int ptNodePos) const; 81 82 BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const; 83 84 const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const { 85 return mHeaderPolicy; 86 } 87 88 const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const { 89 return &mShortcutPolicy; 90 } 91 92 bool addUnigramEntry(const int *const word, const int length, 93 const UnigramProperty *const unigramProperty); 94 95 bool removeUnigramEntry(const int *const word, const int length); 96 97 bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, 98 const BigramProperty *const bigramProperty); 99 100 bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word1, 101 const int length1); 102 103 bool flush(const char *const filePath); 104 105 bool flushWithGC(const char *const filePath); 106 107 bool needsToRunGC(const bool mindsBlockByGC) const; 108 109 void getProperty(const char *const query, const int queryLength, char *const outResult, 110 const int maxResultLength); 111 112 const WordProperty getWordProperty(const int *const codePoints, 113 const int codePointCount) const; 114 115 int getNextWordAndNextToken(const int token, int *const outCodePoints, 116 int *const outCodePointCount); 117 118 bool isCorrupted() const { 119 return mIsCorrupted; 120 } 121 122 private: 123 DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy); 124 125 static const char *const UNIGRAM_COUNT_QUERY; 126 static const char *const BIGRAM_COUNT_QUERY; 127 static const char *const MAX_UNIGRAM_COUNT_QUERY; 128 static const char *const MAX_BIGRAM_COUNT_QUERY; 129 // When the dictionary size is near the maximum size, we have to refuse dynamic operations to 130 // prevent the dictionary from overflowing. 131 static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS; 132 static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS; 133 134 const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers; 135 const HeaderPolicy *const mHeaderPolicy; 136 BufferWithExtendableBuffer *const mDictBuffer; 137 Ver4BigramListPolicy mBigramPolicy; 138 Ver4ShortcutListPolicy mShortcutPolicy; 139 Ver4PatriciaTrieNodeReader mNodeReader; 140 Ver4PtNodeArrayReader mPtNodeArrayReader; 141 Ver4PatriciaTrieNodeWriter mNodeWriter; 142 DynamicPtUpdatingHelper mUpdatingHelper; 143 Ver4PatriciaTrieWritingHelper mWritingHelper; 144 int mUnigramCount; 145 int mBigramCount; 146 std::vector<int> mTerminalPtNodePositionsForIteratingWords; 147 mutable bool mIsCorrupted; 148 149 int getBigramsPositionOfPtNode(const int ptNodePos) const; 150}; 151} // namespace latinime 152#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H 153