language_model_dict_content.h revision 851e0458fe460526b1f953e39a1e406a21ab4647
1/*
2 * Copyright (C) 2014, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H
18#define LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H
19
20#include <cstdio>
21
22#include "defines.h"
23#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
24#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
25#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
26#include "suggest/policyimpl/dictionary/utils/trie_map.h"
27#include "utils/byte_array_view.h"
28#include "utils/int_array_view.h"
29
30namespace latinime {
31
32/**
33 * Class representing language model.
34 *
35 * This class provides methods to get and store unigram/n-gram probability information and flags.
36 */
37class LanguageModelDictContent {
38 public:
39    LanguageModelDictContent(const ReadWriteByteArrayView trieMapBuffer,
40            const bool hasHistoricalInfo)
41            : mTrieMap(trieMapBuffer), mHasHistoricalInfo(hasHistoricalInfo) {}
42
43    explicit LanguageModelDictContent(const bool hasHistoricalInfo)
44            : mTrieMap(), mHasHistoricalInfo(hasHistoricalInfo) {}
45
46    bool isNearSizeLimit() const {
47        return mTrieMap.isNearSizeLimit();
48    }
49
50    bool save(FILE *const file) const;
51
52    bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
53            const LanguageModelDictContent *const originalContent,
54            int *const outNgramCount);
55
56    ProbabilityEntry getProbabilityEntry(const int wordId) const {
57        return getNgramProbabilityEntry(WordIdArrayView(), wordId);
58    }
59
60    bool setProbabilityEntry(const int wordId, const ProbabilityEntry *const probabilityEntry) {
61        return setNgramProbabilityEntry(WordIdArrayView(), wordId, probabilityEntry);
62    }
63
64    ProbabilityEntry getNgramProbabilityEntry(const WordIdArrayView prevWordIds,
65            const int wordId) const;
66
67    bool setNgramProbabilityEntry(const WordIdArrayView prevWordIds, const int wordId,
68            const ProbabilityEntry *const probabilityEntry);
69
70 private:
71    DISALLOW_COPY_AND_ASSIGN(LanguageModelDictContent);
72
73    TrieMap mTrieMap;
74    const bool mHasHistoricalInfo;
75
76    bool runGCInner(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
77            const TrieMap::TrieMapRange trieMapRange, const int nextLevelBitmapEntryIndex,
78            int *const outNgramCount);
79};
80} // namespace latinime
81#endif /* LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H */
82