patricia_trie_policy.h revision b00973952f269ebee6d1d5f808fad7ca64fb9954
1/*
2 * Copyright (C) 2013, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef LATINIME_PATRICIA_TRIE_POLICY_H
18#define LATINIME_PATRICIA_TRIE_POLICY_H
19
20#include <cstdint>
21#include <vector>
22
23#include "defines.h"
24#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
25#include "suggest/policyimpl/dictionary/header/header_policy.h"
26#include "suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h"
27#include "suggest/policyimpl/dictionary/structure/v2/shortcut/shortcut_list_policy.h"
28#include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h"
29#include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h"
30#include "suggest/policyimpl/dictionary/utils/format_utils.h"
31#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
32
33namespace latinime {
34
35class DicNode;
36class DicNodeVector;
37
38class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
39 public:
40    PatriciaTriePolicy(MmappedBuffer::MmappedBufferPtr mmappedBuffer)
41            : mMmappedBuffer(std::move(mmappedBuffer)),
42              mHeaderPolicy(mMmappedBuffer->getBuffer(), FormatUtils::VERSION_2),
43              mDictRoot(mMmappedBuffer->getBuffer() + mHeaderPolicy.getSize()),
44              mDictBufferSize(mMmappedBuffer->getBufferSize() - mHeaderPolicy.getSize()),
45              mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot),
46              mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy),
47              mPtNodeArrayReader(mDictRoot, mDictBufferSize),
48              mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {}
49
50    AK_FORCE_INLINE int getRootPosition() const {
51        return 0;
52    }
53
54    void createAndGetAllChildDicNodes(const DicNode *const dicNode,
55            DicNodeVector *const childDicNodes) const;
56
57    int getCodePointsAndProbabilityAndReturnCodePointCount(
58            const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
59            int *const outUnigramProbability) const;
60
61    int getTerminalPtNodePositionOfWord(const int *const inWord,
62            const int length, const bool forceLowerCaseSearch) const;
63
64    int getProbability(const int unigramProbability, const int bigramProbability) const;
65
66    int getUnigramProbabilityOfPtNode(const int ptNodePos) const;
67
68    int getShortcutPositionOfPtNode(const int ptNodePos) const;
69
70    BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const;
71
72    const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
73        return &mHeaderPolicy;
74    }
75
76    const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
77        return &mShortcutListPolicy;
78    }
79
80    bool addUnigramEntry(const int *const word, const int length,
81            const UnigramProperty *const unigramProperty) {
82        // This method should not be called for non-updatable dictionary.
83        AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary.");
84        return false;
85    }
86
87    bool removeUnigramEntry(const int *const word, const int length) {
88        // This method should not be called for non-updatable dictionary.
89        AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary.");
90        return false;
91    }
92
93    bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
94            const BigramProperty *const bigramProperty) {
95        // This method should not be called for non-updatable dictionary.
96        AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
97        return false;
98    }
99
100    bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word,
101            const int length) {
102        // This method should not be called for non-updatable dictionary.
103        AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
104        return false;
105    }
106
107    bool flush(const char *const filePath) {
108        // This method should not be called for non-updatable dictionary.
109        AKLOGI("Warning: flush() is called for non-updatable dictionary.");
110        return false;
111    }
112
113    bool flushWithGC(const char *const filePath) {
114        // This method should not be called for non-updatable dictionary.
115        AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
116        return false;
117    }
118
119    bool needsToRunGC(const bool mindsBlockByGC) const {
120        // This method should not be called for non-updatable dictionary.
121        AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
122        return false;
123    }
124
125    void getProperty(const char *const query, const int queryLength, char *const outResult,
126            const int maxResultLength) {
127        // getProperty is not supported for this class.
128        if (maxResultLength > 0) {
129            outResult[0] = '\0';
130        }
131    }
132
133    const WordProperty getWordProperty(const int *const codePoints,
134            const int codePointCount) const;
135
136    int getNextWordAndNextToken(const int token, int *const outCodePoints,
137            int *const outCodePointCount);
138
139    bool isCorrupted() const {
140        return mIsCorrupted;
141    }
142
143 private:
144    DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
145
146    const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
147    const HeaderPolicy mHeaderPolicy;
148    const uint8_t *const mDictRoot;
149    const int mDictBufferSize;
150    const BigramListPolicy mBigramListPolicy;
151    const ShortcutListPolicy mShortcutListPolicy;
152    const Ver2ParticiaTrieNodeReader mPtNodeReader;
153    const Ver2PtNodeArrayReader mPtNodeArrayReader;
154    std::vector<int> mTerminalPtNodePositionsForIteratingWords;
155    mutable bool mIsCorrupted;
156
157    int getBigramsPositionOfPtNode(const int ptNodePos) const;
158    int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
159            DicNodeVector *const childDicNodes) const;
160};
161} // namespace latinime
162#endif // LATINIME_PATRICIA_TRIE_POLICY_H
163