patricia_trie_policy.h revision bd1f59bda5ad0b7028ec06c2de078f1623e76cdd
1/*
2 * Copyright (C) 2013, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef LATINIME_PATRICIA_TRIE_POLICY_H
18#define LATINIME_PATRICIA_TRIE_POLICY_H
19
20#include <cstdint>
21#include <vector>
22
23#include "defines.h"
24#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
25#include "suggest/policyimpl/dictionary/header/header_policy.h"
26#include "suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h"
27#include "suggest/policyimpl/dictionary/structure/v2/shortcut/shortcut_list_policy.h"
28#include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h"
29#include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h"
30#include "suggest/policyimpl/dictionary/utils/format_utils.h"
31#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
32
33namespace latinime {
34
35class DicNode;
36class DicNodeVector;
37
38class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
39 public:
40    PatriciaTriePolicy(MmappedBuffer::MmappedBufferPtr mmappedBuffer)
41            : mMmappedBuffer(std::move(mmappedBuffer)),
42              mHeaderPolicy(mMmappedBuffer->getBuffer(), FormatUtils::VERSION_2),
43              mDictRoot(mMmappedBuffer->getBuffer() + mHeaderPolicy.getSize()),
44              mDictBufferSize(mMmappedBuffer->getBufferSize() - mHeaderPolicy.getSize()),
45              mBigramListPolicy(mDictRoot, mDictBufferSize), mShortcutListPolicy(mDictRoot),
46              mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy),
47              mPtNodeArrayReader(mDictRoot, mDictBufferSize),
48              mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {}
49
50    AK_FORCE_INLINE int getRootPosition() const {
51        return 0;
52    }
53
54    void createAndGetAllChildDicNodes(const DicNode *const dicNode,
55            DicNodeVector *const childDicNodes) const;
56
57    int getCodePointsAndProbabilityAndReturnCodePointCount(
58            const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
59            int *const outUnigramProbability) const;
60
61    int getTerminalPtNodePositionOfWord(const int *const inWord,
62            const int length, const bool forceLowerCaseSearch) const;
63
64    int getProbability(const int unigramProbability, const int bigramProbability) const;
65
66    int getProbabilityOfPtNode(const int *const prevWordsPtNodePos, const int ptNodePos) const;
67
68    void iterateNgramEntries(const int *const prevWordsPtNodePos,
69            NgramListener *const listener) const;
70
71    int getShortcutPositionOfPtNode(const int ptNodePos) const;
72
73    BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const;
74
75    const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
76        return &mHeaderPolicy;
77    }
78
79    const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
80        return &mShortcutListPolicy;
81    }
82
83    bool addUnigramEntry(const int *const word, const int length,
84            const UnigramProperty *const unigramProperty) {
85        // This method should not be called for non-updatable dictionary.
86        AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary.");
87        return false;
88    }
89
90    bool removeUnigramEntry(const int *const word, const int length) {
91        // This method should not be called for non-updatable dictionary.
92        AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary.");
93        return false;
94    }
95
96    bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
97            const BigramProperty *const bigramProperty) {
98        // This method should not be called for non-updatable dictionary.
99        AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
100        return false;
101    }
102
103    bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word,
104            const int length) {
105        // This method should not be called for non-updatable dictionary.
106        AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
107        return false;
108    }
109
110    bool flush(const char *const filePath) {
111        // This method should not be called for non-updatable dictionary.
112        AKLOGI("Warning: flush() is called for non-updatable dictionary.");
113        return false;
114    }
115
116    bool flushWithGC(const char *const filePath) {
117        // This method should not be called for non-updatable dictionary.
118        AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
119        return false;
120    }
121
122    bool needsToRunGC(const bool mindsBlockByGC) const {
123        // This method should not be called for non-updatable dictionary.
124        AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
125        return false;
126    }
127
128    void getProperty(const char *const query, const int queryLength, char *const outResult,
129            const int maxResultLength) {
130        // getProperty is not supported for this class.
131        if (maxResultLength > 0) {
132            outResult[0] = '\0';
133        }
134    }
135
136    const WordProperty getWordProperty(const int *const codePoints,
137            const int codePointCount) const;
138
139    int getNextWordAndNextToken(const int token, int *const outCodePoints,
140            int *const outCodePointCount);
141
142    bool isCorrupted() const {
143        return mIsCorrupted;
144    }
145
146 private:
147    DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
148
149    const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
150    const HeaderPolicy mHeaderPolicy;
151    const uint8_t *const mDictRoot;
152    const int mDictBufferSize;
153    const BigramListPolicy mBigramListPolicy;
154    const ShortcutListPolicy mShortcutListPolicy;
155    const Ver2ParticiaTrieNodeReader mPtNodeReader;
156    const Ver2PtNodeArrayReader mPtNodeArrayReader;
157    std::vector<int> mTerminalPtNodePositionsForIteratingWords;
158    mutable bool mIsCorrupted;
159
160    int getBigramsPositionOfPtNode(const int ptNodePos) const;
161    int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
162            DicNodeVector *const childDicNodes) const;
163};
164} // namespace latinime
165#endif // LATINIME_PATRICIA_TRIE_POLICY_H
166