1/*
2 * Copyright (C) 2013, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef LATINIME_VER4_PATRICIA_TRIE_POLICY_H
18#define LATINIME_VER4_PATRICIA_TRIE_POLICY_H
19
20#include <vector>
21
22#include "defines.h"
23#include "dictionary/header/header_policy.h"
24#include "dictionary/interface/dictionary_structure_with_buffer_policy.h"
25#include "dictionary/structure/pt_common/dynamic_pt_updating_helper.h"
26#include "dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h"
27#include "dictionary/structure/v4/ver4_dict_buffers.h"
28#include "dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
29#include "dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
30#include "dictionary/structure/v4/ver4_patricia_trie_writing_helper.h"
31#include "dictionary/structure/v4/ver4_pt_node_array_reader.h"
32#include "dictionary/utils/buffer_with_extendable_buffer.h"
33#include "dictionary/utils/entry_counters.h"
34#include "utils/int_array_view.h"
35
36namespace latinime {
37
38class DicNode;
39class DicNodeVector;
40
41// Word id = Artificial id that is stored in the PtNode looked up by the word.
42class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
43 public:
44    Ver4PatriciaTriePolicy(Ver4DictBuffers::Ver4DictBuffersPtr buffers)
45            : mBuffers(std::move(buffers)), mHeaderPolicy(mBuffers->getHeaderPolicy()),
46              mDictBuffer(mBuffers->getWritableTrieBuffer()),
47              mShortcutPolicy(mBuffers->getMutableShortcutDictContent(),
48                      mBuffers->getTerminalPositionLookupTable()),
49              mNodeReader(mDictBuffer), mPtNodeArrayReader(mDictBuffer),
50              mNodeWriter(mDictBuffer, mBuffers.get(), &mNodeReader, &mPtNodeArrayReader,
51                      &mShortcutPolicy),
52              mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
53              mWritingHelper(mBuffers.get()),
54              mEntryCounters(mHeaderPolicy->getNgramCounts().getCountArray()),
55              mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {};
56
57    AK_FORCE_INLINE int getRootPosition() const {
58        return 0;
59    }
60
61    void createAndGetAllChildDicNodes(const DicNode *const dicNode,
62            DicNodeVector *const childDicNodes) const;
63
64    int getCodePointsAndReturnCodePointCount(const int wordId, const int maxCodePointCount,
65            int *const outCodePoints) const;
66
67    int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const;
68
69    const WordAttributes getWordAttributesInContext(const WordIdArrayView prevWordIds,
70            const int wordId, MultiBigramMap *const multiBigramMap) const;
71
72    // TODO: Remove
73    int getProbability(const int unigramProbability, const int bigramProbability) const {
74        // Not used.
75        return NOT_A_PROBABILITY;
76    }
77
78    int getProbabilityOfWord(const WordIdArrayView prevWordIds, const int wordId) const;
79
80    void iterateNgramEntries(const WordIdArrayView prevWordIds,
81            NgramListener *const listener) const;
82
83    BinaryDictionaryShortcutIterator getShortcutIterator(const int wordId) const;
84
85    const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
86        return mHeaderPolicy;
87    }
88
89    bool addUnigramEntry(const CodePointArrayView wordCodePoints,
90            const UnigramProperty *const unigramProperty);
91
92    bool removeUnigramEntry(const CodePointArrayView wordCodePoints);
93
94    bool addNgramEntry(const NgramProperty *const ngramProperty);
95
96    bool removeNgramEntry(const NgramContext *const ngramContext,
97            const CodePointArrayView wordCodePoints);
98
99    bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext,
100            const CodePointArrayView wordCodePoints, const bool isValidWord,
101            const HistoricalInfo historicalInfo);
102
103    bool flush(const char *const filePath);
104
105    bool flushWithGC(const char *const filePath);
106
107    bool needsToRunGC(const bool mindsBlockByGC) const;
108
109    void getProperty(const char *const query, const int queryLength, char *const outResult,
110            const int maxResultLength);
111
112    const WordProperty getWordProperty(const CodePointArrayView wordCodePoints) const;
113
114    int getNextWordAndNextToken(const int token, int *const outCodePoints,
115            int *const outCodePointCount);
116
117    bool isCorrupted() const {
118        return mIsCorrupted;
119    }
120
121 private:
122    DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy);
123
124    static const char *const UNIGRAM_COUNT_QUERY;
125    static const char *const BIGRAM_COUNT_QUERY;
126    static const char *const MAX_UNIGRAM_COUNT_QUERY;
127    static const char *const MAX_BIGRAM_COUNT_QUERY;
128    // When the dictionary size is near the maximum size, we have to refuse dynamic operations to
129    // prevent the dictionary from overflowing.
130    static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
131    static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
132
133    const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
134    const HeaderPolicy *const mHeaderPolicy;
135    BufferWithExtendableBuffer *const mDictBuffer;
136    Ver4ShortcutListPolicy mShortcutPolicy;
137    Ver4PatriciaTrieNodeReader mNodeReader;
138    Ver4PtNodeArrayReader mPtNodeArrayReader;
139    Ver4PatriciaTrieNodeWriter mNodeWriter;
140    DynamicPtUpdatingHelper mUpdatingHelper;
141    Ver4PatriciaTrieWritingHelper mWritingHelper;
142    MutableEntryCounters mEntryCounters;
143    std::vector<int> mTerminalPtNodePositionsForIteratingWords;
144    mutable bool mIsCorrupted;
145
146    int getShortcutPositionOfWord(const int wordId) const;
147};
148} // namespace latinime
149#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H
150