ver4_patricia_trie_policy.h revision bd1f59bda5ad0b7028ec06c2de078f1623e76cdd
1/*
2 * Copyright (C) 2013, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef LATINIME_VER4_PATRICIA_TRIE_POLICY_H
18#define LATINIME_VER4_PATRICIA_TRIE_POLICY_H
19
20#include <vector>
21
22#include "defines.h"
23#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
24#include "suggest/policyimpl/dictionary/header/header_policy.h"
25#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h"
26#include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h"
27#include "suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h"
28#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
29#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
30#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
31#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h"
32#include "suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h"
33#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
34
35namespace latinime {
36
37class DicNode;
38class DicNodeVector;
39
40class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
41 public:
42    Ver4PatriciaTriePolicy(Ver4DictBuffers::Ver4DictBuffersPtr buffers)
43            : mBuffers(std::move(buffers)), mHeaderPolicy(mBuffers->getHeaderPolicy()),
44              mDictBuffer(mBuffers->getWritableTrieBuffer()),
45              mBigramPolicy(mBuffers->getMutableBigramDictContent(),
46                      mBuffers->getTerminalPositionLookupTable(), mHeaderPolicy),
47              mShortcutPolicy(mBuffers->getMutableShortcutDictContent(),
48                      mBuffers->getTerminalPositionLookupTable()),
49              mNodeReader(mDictBuffer, mBuffers->getProbabilityDictContent(), mHeaderPolicy),
50              mPtNodeArrayReader(mDictBuffer),
51              mNodeWriter(mDictBuffer, mBuffers.get(), mHeaderPolicy, &mNodeReader,
52                      &mPtNodeArrayReader, &mBigramPolicy, &mShortcutPolicy),
53              mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
54              mWritingHelper(mBuffers.get()),
55              mUnigramCount(mHeaderPolicy->getUnigramCount()),
56              mBigramCount(mHeaderPolicy->getBigramCount()),
57              mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {};
58
59    AK_FORCE_INLINE int getRootPosition() const {
60        return 0;
61    }
62
63    void createAndGetAllChildDicNodes(const DicNode *const dicNode,
64            DicNodeVector *const childDicNodes) const;
65
66    int getCodePointsAndProbabilityAndReturnCodePointCount(
67            const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
68            int *const outUnigramProbability) const;
69
70    int getTerminalPtNodePositionOfWord(const int *const inWord,
71            const int length, const bool forceLowerCaseSearch) const;
72
73    int getProbability(const int unigramProbability, const int bigramProbability) const;
74
75    int getProbabilityOfPtNode(const int *const prevWordsPtNodePos, const int ptNodePos) const;
76
77    void iterateNgramEntries(const int *const prevWordsPtNodePos,
78            NgramListener *const listener) const;
79
80    int getShortcutPositionOfPtNode(const int ptNodePos) const;
81
82    BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const;
83
84    const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
85        return mHeaderPolicy;
86    }
87
88    const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
89        return &mShortcutPolicy;
90    }
91
92    bool addUnigramEntry(const int *const word, const int length,
93            const UnigramProperty *const unigramProperty);
94
95    bool removeUnigramEntry(const int *const word, const int length);
96
97    bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
98            const BigramProperty *const bigramProperty);
99
100    bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word1,
101            const int length1);
102
103    bool flush(const char *const filePath);
104
105    bool flushWithGC(const char *const filePath);
106
107    bool needsToRunGC(const bool mindsBlockByGC) const;
108
109    void getProperty(const char *const query, const int queryLength, char *const outResult,
110            const int maxResultLength);
111
112    const WordProperty getWordProperty(const int *const codePoints,
113            const int codePointCount) const;
114
115    int getNextWordAndNextToken(const int token, int *const outCodePoints,
116            int *const outCodePointCount);
117
118    bool isCorrupted() const {
119        return mIsCorrupted;
120    }
121
122 private:
123    DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy);
124
125    static const char *const UNIGRAM_COUNT_QUERY;
126    static const char *const BIGRAM_COUNT_QUERY;
127    static const char *const MAX_UNIGRAM_COUNT_QUERY;
128    static const char *const MAX_BIGRAM_COUNT_QUERY;
129    // When the dictionary size is near the maximum size, we have to refuse dynamic operations to
130    // prevent the dictionary from overflowing.
131    static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
132    static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
133
134    const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
135    const HeaderPolicy *const mHeaderPolicy;
136    BufferWithExtendableBuffer *const mDictBuffer;
137    Ver4BigramListPolicy mBigramPolicy;
138    Ver4ShortcutListPolicy mShortcutPolicy;
139    Ver4PatriciaTrieNodeReader mNodeReader;
140    Ver4PtNodeArrayReader mPtNodeArrayReader;
141    Ver4PatriciaTrieNodeWriter mNodeWriter;
142    DynamicPtUpdatingHelper mUpdatingHelper;
143    Ver4PatriciaTrieWritingHelper mWritingHelper;
144    int mUnigramCount;
145    int mBigramCount;
146    std::vector<int> mTerminalPtNodePositionsForIteratingWords;
147    mutable bool mIsCorrupted;
148
149    int getBigramsPositionOfPtNode(const int ptNodePos) const;
150};
151} // namespace latinime
152#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H
153