1/*
2 * Copyright (C) 2013, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef LATINIME_PATRICIA_TRIE_POLICY_H
18#define LATINIME_PATRICIA_TRIE_POLICY_H
19
20#include <cstdint>
21#include <vector>
22
23#include "defines.h"
24#include "dictionary/header/header_policy.h"
25#include "dictionary/interface/dictionary_structure_with_buffer_policy.h"
26#include "dictionary/structure/v2/bigram/bigram_list_policy.h"
27#include "dictionary/structure/v2/shortcut/shortcut_list_policy.h"
28#include "dictionary/structure/v2/ver2_patricia_trie_node_reader.h"
29#include "dictionary/structure/v2/ver2_pt_node_array_reader.h"
30#include "dictionary/utils/format_utils.h"
31#include "dictionary/utils/mmapped_buffer.h"
32#include "utils/byte_array_view.h"
33#include "utils/int_array_view.h"
34
35namespace latinime {
36
37class DicNode;
38class DicNodeVector;
39
40// Word id = Position of a PtNode that represents the word.
41// Max supported n-gram is bigram.
42class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
43 public:
44    PatriciaTriePolicy(MmappedBuffer::MmappedBufferPtr mmappedBuffer)
45            : mMmappedBuffer(std::move(mmappedBuffer)),
46              mHeaderPolicy(mMmappedBuffer->getReadOnlyByteArrayView().data(),
47                      FormatUtils::detectFormatVersion(mMmappedBuffer->getReadOnlyByteArrayView())),
48              mBuffer(mMmappedBuffer->getReadOnlyByteArrayView().skip(mHeaderPolicy.getSize())),
49              mBigramListPolicy(mBuffer), mShortcutListPolicy(mBuffer),
50              mPtNodeReader(mBuffer, &mBigramListPolicy, &mShortcutListPolicy,
51                      mHeaderPolicy.getCodePointTable()),
52              mPtNodeArrayReader(mBuffer), mTerminalPtNodePositionsForIteratingWords(),
53              mIsCorrupted(false) {}
54
55    AK_FORCE_INLINE int getRootPosition() const {
56        return 0;
57    }
58
59    void createAndGetAllChildDicNodes(const DicNode *const dicNode,
60            DicNodeVector *const childDicNodes) const;
61
62    int getCodePointsAndReturnCodePointCount(const int wordId, const int maxCodePointCount,
63            int *const outCodePoints) const;
64
65    int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const;
66
67    const WordAttributes getWordAttributesInContext(const WordIdArrayView prevWordIds,
68            const int wordId, MultiBigramMap *const multiBigramMap) const;
69
70    int getProbability(const int unigramProbability, const int bigramProbability) const;
71
72    int getProbabilityOfWord(const WordIdArrayView prevWordIds, const int wordId) const;
73
74    void iterateNgramEntries(const WordIdArrayView prevWordIds,
75            NgramListener *const listener) const;
76
77    BinaryDictionaryShortcutIterator getShortcutIterator(const int wordId) const;
78
79    const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
80        return &mHeaderPolicy;
81    }
82
83    bool addUnigramEntry(const CodePointArrayView wordCodePoints,
84            const UnigramProperty *const unigramProperty) {
85        // This method should not be called for non-updatable dictionary.
86        AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary.");
87        return false;
88    }
89
90    bool removeUnigramEntry(const CodePointArrayView wordCodePoints) {
91        // This method should not be called for non-updatable dictionary.
92        AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary.");
93        return false;
94    }
95
96    bool addNgramEntry(const NgramProperty *const ngramProperty) {
97        // This method should not be called for non-updatable dictionary.
98        AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
99        return false;
100    }
101
102    bool removeNgramEntry(const NgramContext *const ngramContext,
103            const CodePointArrayView wordCodePoints) {
104        // This method should not be called for non-updatable dictionary.
105        AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
106        return false;
107    }
108
109    bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext,
110            const CodePointArrayView wordCodePoints, const bool isValidWord,
111            const HistoricalInfo historicalInfo) {
112        // This method should not be called for non-updatable dictionary.
113        AKLOGI("Warning: updateEntriesForWordWithNgramContext() is called for non-updatable "
114                "dictionary.");
115        return false;
116    }
117
118    bool flush(const char *const filePath) {
119        // This method should not be called for non-updatable dictionary.
120        AKLOGI("Warning: flush() is called for non-updatable dictionary.");
121        return false;
122    }
123
124    bool flushWithGC(const char *const filePath) {
125        // This method should not be called for non-updatable dictionary.
126        AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
127        return false;
128    }
129
130    bool needsToRunGC(const bool mindsBlockByGC) const {
131        // This method should not be called for non-updatable dictionary.
132        AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
133        return false;
134    }
135
136    void getProperty(const char *const query, const int queryLength, char *const outResult,
137            const int maxResultLength) {
138        // getProperty is not supported for this class.
139        if (maxResultLength > 0) {
140            outResult[0] = '\0';
141        }
142    }
143
144    const WordProperty getWordProperty(const CodePointArrayView wordCodePoints) const;
145
146    int getNextWordAndNextToken(const int token, int *const outCodePoints,
147            int *const outCodePointCount);
148
149    bool isCorrupted() const {
150        return mIsCorrupted;
151    }
152
153 private:
154    DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
155
156    const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
157    const HeaderPolicy mHeaderPolicy;
158    const ReadOnlyByteArrayView mBuffer;
159    const BigramListPolicy mBigramListPolicy;
160    const ShortcutListPolicy mShortcutListPolicy;
161    const Ver2ParticiaTrieNodeReader mPtNodeReader;
162    const Ver2PtNodeArrayReader mPtNodeArrayReader;
163    std::vector<int> mTerminalPtNodePositionsForIteratingWords;
164    mutable bool mIsCorrupted;
165
166    int getCodePointsAndProbabilityAndReturnCodePointCount(const int wordId,
167            const int maxCodePointCount, int *const outCodePoints,
168            int *const outUnigramProbability) const;
169    int getShortcutPositionOfPtNode(const int ptNodePos) const;
170    int getBigramsPositionOfPtNode(const int ptNodePos) const;
171    int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
172            DicNodeVector *const childDicNodes) const;
173    int getWordIdFromTerminalPtNodePos(const int ptNodePos) const;
174    int getTerminalPtNodePosFromWordId(const int wordId) const;
175    const WordAttributes getWordAttributes(const int probability,
176            const PtNodeParams &ptNodeParams) const;
177    bool isValidPos(const int pos) const;
178};
179} // namespace latinime
180#endif // LATINIME_PATRICIA_TRIE_POLICY_H
181