patricia_trie_policy.h revision c0c674cdc0721a374e140ad5ee1409c0498b3262
1/*
2 * Copyright (C) 2013, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef LATINIME_PATRICIA_TRIE_POLICY_H
18#define LATINIME_PATRICIA_TRIE_POLICY_H
19
20#include <cstdint>
21#include <vector>
22
23#include "defines.h"
24#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
25#include "suggest/policyimpl/dictionary/header/header_policy.h"
26#include "suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h"
27#include "suggest/policyimpl/dictionary/structure/v2/shortcut/shortcut_list_policy.h"
28#include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h"
29#include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h"
30#include "suggest/policyimpl/dictionary/utils/format_utils.h"
31#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
32#include "utils/byte_array_view.h"
33
34namespace latinime {
35
36class DicNode;
37class DicNodeVector;
38
39class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
40 public:
41    PatriciaTriePolicy(MmappedBuffer::MmappedBufferPtr mmappedBuffer)
42            : mMmappedBuffer(std::move(mmappedBuffer)),
43              mHeaderPolicy(mMmappedBuffer->getReadOnlyByteArrayView().data(),
44                      FormatUtils::VERSION_2),
45              mDictRoot(mMmappedBuffer->getReadOnlyByteArrayView().data()
46                      + mHeaderPolicy.getSize()),
47              mDictBufferSize(mMmappedBuffer->getReadOnlyByteArrayView().size()
48                      - mHeaderPolicy.getSize()),
49              mBigramListPolicy(mDictRoot, mDictBufferSize), mShortcutListPolicy(mDictRoot),
50              mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy),
51              mPtNodeArrayReader(mDictRoot, mDictBufferSize),
52              mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {}
53
54    AK_FORCE_INLINE int getRootPosition() const {
55        return 0;
56    }
57
58    void createAndGetAllChildDicNodes(const DicNode *const dicNode,
59            DicNodeVector *const childDicNodes) const;
60
61    int getCodePointsAndProbabilityAndReturnCodePointCount(
62            const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
63            int *const outUnigramProbability) const;
64
65    int getTerminalPtNodePositionOfWord(const int *const inWord,
66            const int length, const bool forceLowerCaseSearch) const;
67
68    int getProbability(const int unigramProbability, const int bigramProbability) const;
69
70    int getProbabilityOfPtNode(const int *const prevWordsPtNodePos, const int ptNodePos) const;
71
72    void iterateNgramEntries(const int *const prevWordsPtNodePos,
73            NgramListener *const listener) const;
74
75    int getShortcutPositionOfPtNode(const int ptNodePos) const;
76
77    const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
78        return &mHeaderPolicy;
79    }
80
81    const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
82        return &mShortcutListPolicy;
83    }
84
85    bool addUnigramEntry(const int *const word, const int length,
86            const UnigramProperty *const unigramProperty) {
87        // This method should not be called for non-updatable dictionary.
88        AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary.");
89        return false;
90    }
91
92    bool removeUnigramEntry(const int *const word, const int length) {
93        // This method should not be called for non-updatable dictionary.
94        AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary.");
95        return false;
96    }
97
98    bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
99            const BigramProperty *const bigramProperty) {
100        // This method should not be called for non-updatable dictionary.
101        AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
102        return false;
103    }
104
105    bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word,
106            const int length) {
107        // This method should not be called for non-updatable dictionary.
108        AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
109        return false;
110    }
111
112    bool flush(const char *const filePath) {
113        // This method should not be called for non-updatable dictionary.
114        AKLOGI("Warning: flush() is called for non-updatable dictionary.");
115        return false;
116    }
117
118    bool flushWithGC(const char *const filePath) {
119        // This method should not be called for non-updatable dictionary.
120        AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
121        return false;
122    }
123
124    bool needsToRunGC(const bool mindsBlockByGC) const {
125        // This method should not be called for non-updatable dictionary.
126        AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
127        return false;
128    }
129
130    void getProperty(const char *const query, const int queryLength, char *const outResult,
131            const int maxResultLength) {
132        // getProperty is not supported for this class.
133        if (maxResultLength > 0) {
134            outResult[0] = '\0';
135        }
136    }
137
138    const WordProperty getWordProperty(const int *const codePoints,
139            const int codePointCount) const;
140
141    int getNextWordAndNextToken(const int token, int *const outCodePoints,
142            int *const outCodePointCount);
143
144    bool isCorrupted() const {
145        return mIsCorrupted;
146    }
147
148 private:
149    DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
150
151    const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
152    const HeaderPolicy mHeaderPolicy;
153    const uint8_t *const mDictRoot;
154    const int mDictBufferSize;
155    const BigramListPolicy mBigramListPolicy;
156    const ShortcutListPolicy mShortcutListPolicy;
157    const Ver2ParticiaTrieNodeReader mPtNodeReader;
158    const Ver2PtNodeArrayReader mPtNodeArrayReader;
159    std::vector<int> mTerminalPtNodePositionsForIteratingWords;
160    mutable bool mIsCorrupted;
161
162    int getBigramsPositionOfPtNode(const int ptNodePos) const;
163    int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
164            DicNodeVector *const childDicNodes) const;
165};
166} // namespace latinime
167#endif // LATINIME_PATRICIA_TRIE_POLICY_H
168