language_model_dict_content.h revision b4531d861ea740f1bf8e718f312150eb682e3f7b
1dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi/*
2dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * Copyright (C) 2014, The Android Open Source Project
3dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi *
4dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * Licensed under the Apache License, Version 2.0 (the "License");
5dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * you may not use this file except in compliance with the License.
6dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * You may obtain a copy of the License at
7dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi *
8dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi *     http://www.apache.org/licenses/LICENSE-2.0
9dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi *
10dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * Unless required by applicable law or agreed to in writing, software
11dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * distributed under the License is distributed on an "AS IS" BASIS,
12dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * See the License for the specific language governing permissions and
14dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * limitations under the License.
15dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi */
16dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi
17dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi#ifndef LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H
18dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi#define LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H
19dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi
20c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi#include <cstdio>
21c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi
22dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi#include "defines.h"
2308894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
2408894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
2508894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
26c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi#include "suggest/policyimpl/dictionary/utils/trie_map.h"
27c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi#include "utils/byte_array_view.h"
2808894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi#include "utils/int_array_view.h"
29dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi
30dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanaginamespace latinime {
31dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi
3208894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi/**
3308894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi * Class representing language model.
3408894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi *
3508894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi * This class provides methods to get and store unigram/n-gram probability information and flags.
3608894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi */
37dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagiclass LanguageModelDictContent {
38dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi public:
39c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi    LanguageModelDictContent(const ReadWriteByteArrayView trieMapBuffer,
40c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi            const bool hasHistoricalInfo)
4108894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi            : mTrieMap(trieMapBuffer), mHasHistoricalInfo(hasHistoricalInfo) {}
42c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi
4308894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi    explicit LanguageModelDictContent(const bool hasHistoricalInfo)
4408894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi            : mTrieMap(), mHasHistoricalInfo(hasHistoricalInfo) {}
4508894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi
4608894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi    bool isNearSizeLimit() const {
4708894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi        return mTrieMap.isNearSizeLimit();
4808894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi    }
49c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi
50c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi    bool save(FILE *const file) const;
51dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi
5208894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi    bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
5308894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi            const LanguageModelDictContent *const originalContent,
5408894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi            int *const outNgramCount);
5508894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi
56851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi    ProbabilityEntry getProbabilityEntry(const int wordId) const {
57851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi        return getNgramProbabilityEntry(WordIdArrayView(), wordId);
58851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi    }
59851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi
60851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi    bool setProbabilityEntry(const int wordId, const ProbabilityEntry *const probabilityEntry) {
61851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi        return setNgramProbabilityEntry(WordIdArrayView(), wordId, probabilityEntry);
62851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi    }
63851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi
64b4531d861ea740f1bf8e718f312150eb682e3f7bKeisuke Kuroyanagi    bool removeProbabilityEntry(const int wordId) {
65b4531d861ea740f1bf8e718f312150eb682e3f7bKeisuke Kuroyanagi        return removeNgramProbabilityEntry(WordIdArrayView(), wordId);
66b4531d861ea740f1bf8e718f312150eb682e3f7bKeisuke Kuroyanagi    }
67b4531d861ea740f1bf8e718f312150eb682e3f7bKeisuke Kuroyanagi
68851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi    ProbabilityEntry getNgramProbabilityEntry(const WordIdArrayView prevWordIds,
69851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi            const int wordId) const;
7008894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi
71851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi    bool setNgramProbabilityEntry(const WordIdArrayView prevWordIds, const int wordId,
7208894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi            const ProbabilityEntry *const probabilityEntry);
7308894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi
74b4531d861ea740f1bf8e718f312150eb682e3f7bKeisuke Kuroyanagi    bool removeNgramProbabilityEntry(const WordIdArrayView prevWordIds, const int wordId);
75b4531d861ea740f1bf8e718f312150eb682e3f7bKeisuke Kuroyanagi
76dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi private:
77dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi    DISALLOW_COPY_AND_ASSIGN(LanguageModelDictContent);
78c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi
79c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi    TrieMap mTrieMap;
8008894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi    const bool mHasHistoricalInfo;
8108894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi
8208894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi    bool runGCInner(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
8308894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi            const TrieMap::TrieMapRange trieMapRange, const int nextLevelBitmapEntryIndex,
8408894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi            int *const outNgramCount);
859a23f0fba25137760a60e9bfaf6bf20a5889648cKeisuke Kuroyanagi    int createAndGetBitmapEntryIndex(const WordIdArrayView prevWordIds);
8603dc44f543795040a092723085fac1209103b7bdKeisuke Kuroyanagi    int getBitmapEntryIndex(const WordIdArrayView prevWordIds) const;
87dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi};
88dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi} // namespace latinime
89dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi#endif /* LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H */
90