patricia_trie_policy.h revision bd1f59bda5ad0b7028ec06c2de078f1623e76cdd
1/* 2 * Copyright (C) 2013, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#ifndef LATINIME_PATRICIA_TRIE_POLICY_H 18#define LATINIME_PATRICIA_TRIE_POLICY_H 19 20#include <cstdint> 21#include <vector> 22 23#include "defines.h" 24#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" 25#include "suggest/policyimpl/dictionary/header/header_policy.h" 26#include "suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h" 27#include "suggest/policyimpl/dictionary/structure/v2/shortcut/shortcut_list_policy.h" 28#include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h" 29#include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h" 30#include "suggest/policyimpl/dictionary/utils/format_utils.h" 31#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" 32 33namespace latinime { 34 35class DicNode; 36class DicNodeVector; 37 38class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { 39 public: 40 PatriciaTriePolicy(MmappedBuffer::MmappedBufferPtr mmappedBuffer) 41 : mMmappedBuffer(std::move(mmappedBuffer)), 42 mHeaderPolicy(mMmappedBuffer->getBuffer(), FormatUtils::VERSION_2), 43 mDictRoot(mMmappedBuffer->getBuffer() + mHeaderPolicy.getSize()), 44 mDictBufferSize(mMmappedBuffer->getBufferSize() - mHeaderPolicy.getSize()), 45 mBigramListPolicy(mDictRoot, mDictBufferSize), mShortcutListPolicy(mDictRoot), 46 mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy), 47 mPtNodeArrayReader(mDictRoot, mDictBufferSize), 48 mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {} 49 50 AK_FORCE_INLINE int getRootPosition() const { 51 return 0; 52 } 53 54 void createAndGetAllChildDicNodes(const DicNode *const dicNode, 55 DicNodeVector *const childDicNodes) const; 56 57 int getCodePointsAndProbabilityAndReturnCodePointCount( 58 const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints, 59 int *const outUnigramProbability) const; 60 61 int getTerminalPtNodePositionOfWord(const int *const inWord, 62 const int length, const bool forceLowerCaseSearch) const; 63 64 int getProbability(const int unigramProbability, const int bigramProbability) const; 65 66 int getProbabilityOfPtNode(const int *const prevWordsPtNodePos, const int ptNodePos) const; 67 68 void iterateNgramEntries(const int *const prevWordsPtNodePos, 69 NgramListener *const listener) const; 70 71 int getShortcutPositionOfPtNode(const int ptNodePos) const; 72 73 BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const; 74 75 const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const { 76 return &mHeaderPolicy; 77 } 78 79 const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const { 80 return &mShortcutListPolicy; 81 } 82 83 bool addUnigramEntry(const int *const word, const int length, 84 const UnigramProperty *const unigramProperty) { 85 // This method should not be called for non-updatable dictionary. 86 AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary."); 87 return false; 88 } 89 90 bool removeUnigramEntry(const int *const word, const int length) { 91 // This method should not be called for non-updatable dictionary. 92 AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary."); 93 return false; 94 } 95 96 bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, 97 const BigramProperty *const bigramProperty) { 98 // This method should not be called for non-updatable dictionary. 99 AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary."); 100 return false; 101 } 102 103 bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word, 104 const int length) { 105 // This method should not be called for non-updatable dictionary. 106 AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary."); 107 return false; 108 } 109 110 bool flush(const char *const filePath) { 111 // This method should not be called for non-updatable dictionary. 112 AKLOGI("Warning: flush() is called for non-updatable dictionary."); 113 return false; 114 } 115 116 bool flushWithGC(const char *const filePath) { 117 // This method should not be called for non-updatable dictionary. 118 AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary."); 119 return false; 120 } 121 122 bool needsToRunGC(const bool mindsBlockByGC) const { 123 // This method should not be called for non-updatable dictionary. 124 AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary."); 125 return false; 126 } 127 128 void getProperty(const char *const query, const int queryLength, char *const outResult, 129 const int maxResultLength) { 130 // getProperty is not supported for this class. 131 if (maxResultLength > 0) { 132 outResult[0] = '\0'; 133 } 134 } 135 136 const WordProperty getWordProperty(const int *const codePoints, 137 const int codePointCount) const; 138 139 int getNextWordAndNextToken(const int token, int *const outCodePoints, 140 int *const outCodePointCount); 141 142 bool isCorrupted() const { 143 return mIsCorrupted; 144 } 145 146 private: 147 DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy); 148 149 const MmappedBuffer::MmappedBufferPtr mMmappedBuffer; 150 const HeaderPolicy mHeaderPolicy; 151 const uint8_t *const mDictRoot; 152 const int mDictBufferSize; 153 const BigramListPolicy mBigramListPolicy; 154 const ShortcutListPolicy mShortcutListPolicy; 155 const Ver2ParticiaTrieNodeReader mPtNodeReader; 156 const Ver2PtNodeArrayReader mPtNodeArrayReader; 157 std::vector<int> mTerminalPtNodePositionsForIteratingWords; 158 mutable bool mIsCorrupted; 159 160 int getBigramsPositionOfPtNode(const int ptNodePos) const; 161 int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos, 162 DicNodeVector *const childDicNodes) const; 163}; 164} // namespace latinime 165#endif // LATINIME_PATRICIA_TRIE_POLICY_H 166