patricia_trie_policy.h revision b00973952f269ebee6d1d5f808fad7ca64fb9954
1/* 2 * Copyright (C) 2013, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#ifndef LATINIME_PATRICIA_TRIE_POLICY_H 18#define LATINIME_PATRICIA_TRIE_POLICY_H 19 20#include <cstdint> 21#include <vector> 22 23#include "defines.h" 24#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" 25#include "suggest/policyimpl/dictionary/header/header_policy.h" 26#include "suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h" 27#include "suggest/policyimpl/dictionary/structure/v2/shortcut/shortcut_list_policy.h" 28#include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h" 29#include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h" 30#include "suggest/policyimpl/dictionary/utils/format_utils.h" 31#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" 32 33namespace latinime { 34 35class DicNode; 36class DicNodeVector; 37 38class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { 39 public: 40 PatriciaTriePolicy(MmappedBuffer::MmappedBufferPtr mmappedBuffer) 41 : mMmappedBuffer(std::move(mmappedBuffer)), 42 mHeaderPolicy(mMmappedBuffer->getBuffer(), FormatUtils::VERSION_2), 43 mDictRoot(mMmappedBuffer->getBuffer() + mHeaderPolicy.getSize()), 44 mDictBufferSize(mMmappedBuffer->getBufferSize() - mHeaderPolicy.getSize()), 45 mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot), 46 mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy), 47 mPtNodeArrayReader(mDictRoot, mDictBufferSize), 48 mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {} 49 50 AK_FORCE_INLINE int getRootPosition() const { 51 return 0; 52 } 53 54 void createAndGetAllChildDicNodes(const DicNode *const dicNode, 55 DicNodeVector *const childDicNodes) const; 56 57 int getCodePointsAndProbabilityAndReturnCodePointCount( 58 const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints, 59 int *const outUnigramProbability) const; 60 61 int getTerminalPtNodePositionOfWord(const int *const inWord, 62 const int length, const bool forceLowerCaseSearch) const; 63 64 int getProbability(const int unigramProbability, const int bigramProbability) const; 65 66 int getUnigramProbabilityOfPtNode(const int ptNodePos) const; 67 68 int getShortcutPositionOfPtNode(const int ptNodePos) const; 69 70 BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const; 71 72 const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const { 73 return &mHeaderPolicy; 74 } 75 76 const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const { 77 return &mShortcutListPolicy; 78 } 79 80 bool addUnigramEntry(const int *const word, const int length, 81 const UnigramProperty *const unigramProperty) { 82 // This method should not be called for non-updatable dictionary. 83 AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary."); 84 return false; 85 } 86 87 bool removeUnigramEntry(const int *const word, const int length) { 88 // This method should not be called for non-updatable dictionary. 89 AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary."); 90 return false; 91 } 92 93 bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, 94 const BigramProperty *const bigramProperty) { 95 // This method should not be called for non-updatable dictionary. 96 AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary."); 97 return false; 98 } 99 100 bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word, 101 const int length) { 102 // This method should not be called for non-updatable dictionary. 103 AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary."); 104 return false; 105 } 106 107 bool flush(const char *const filePath) { 108 // This method should not be called for non-updatable dictionary. 109 AKLOGI("Warning: flush() is called for non-updatable dictionary."); 110 return false; 111 } 112 113 bool flushWithGC(const char *const filePath) { 114 // This method should not be called for non-updatable dictionary. 115 AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary."); 116 return false; 117 } 118 119 bool needsToRunGC(const bool mindsBlockByGC) const { 120 // This method should not be called for non-updatable dictionary. 121 AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary."); 122 return false; 123 } 124 125 void getProperty(const char *const query, const int queryLength, char *const outResult, 126 const int maxResultLength) { 127 // getProperty is not supported for this class. 128 if (maxResultLength > 0) { 129 outResult[0] = '\0'; 130 } 131 } 132 133 const WordProperty getWordProperty(const int *const codePoints, 134 const int codePointCount) const; 135 136 int getNextWordAndNextToken(const int token, int *const outCodePoints, 137 int *const outCodePointCount); 138 139 bool isCorrupted() const { 140 return mIsCorrupted; 141 } 142 143 private: 144 DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy); 145 146 const MmappedBuffer::MmappedBufferPtr mMmappedBuffer; 147 const HeaderPolicy mHeaderPolicy; 148 const uint8_t *const mDictRoot; 149 const int mDictBufferSize; 150 const BigramListPolicy mBigramListPolicy; 151 const ShortcutListPolicy mShortcutListPolicy; 152 const Ver2ParticiaTrieNodeReader mPtNodeReader; 153 const Ver2PtNodeArrayReader mPtNodeArrayReader; 154 std::vector<int> mTerminalPtNodePositionsForIteratingWords; 155 mutable bool mIsCorrupted; 156 157 int getBigramsPositionOfPtNode(const int ptNodePos) const; 158 int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos, 159 DicNodeVector *const childDicNodes) const; 160}; 161} // namespace latinime 162#endif // LATINIME_PATRICIA_TRIE_POLICY_H 163