language_model_dict_content.h revision b4531d861ea740f1bf8e718f312150eb682e3f7b
1dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi/* 2dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * Copyright (C) 2014, The Android Open Source Project 3dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * 4dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * Licensed under the Apache License, Version 2.0 (the "License"); 5dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * you may not use this file except in compliance with the License. 6dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * You may obtain a copy of the License at 7dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * 8dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * http://www.apache.org/licenses/LICENSE-2.0 9dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * 10dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * Unless required by applicable law or agreed to in writing, software 11dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * distributed under the License is distributed on an "AS IS" BASIS, 12dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * See the License for the specific language governing permissions and 14dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * limitations under the License. 15dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi */ 16dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi 17dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi#ifndef LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H 18dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi#define LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H 19dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi 20c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi#include <cstdio> 21c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi 22dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi#include "defines.h" 2308894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h" 2408894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" 2508894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" 26c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi#include "suggest/policyimpl/dictionary/utils/trie_map.h" 27c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi#include "utils/byte_array_view.h" 2808894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi#include "utils/int_array_view.h" 29dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi 30dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanaginamespace latinime { 31dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi 3208894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi/** 3308894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi * Class representing language model. 3408894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi * 3508894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi * This class provides methods to get and store unigram/n-gram probability information and flags. 3608894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi */ 37dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagiclass LanguageModelDictContent { 38dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi public: 39c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi LanguageModelDictContent(const ReadWriteByteArrayView trieMapBuffer, 40c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi const bool hasHistoricalInfo) 4108894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi : mTrieMap(trieMapBuffer), mHasHistoricalInfo(hasHistoricalInfo) {} 42c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi 4308894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi explicit LanguageModelDictContent(const bool hasHistoricalInfo) 4408894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi : mTrieMap(), mHasHistoricalInfo(hasHistoricalInfo) {} 4508894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi 4608894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi bool isNearSizeLimit() const { 4708894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi return mTrieMap.isNearSizeLimit(); 4808894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi } 49c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi 50c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi bool save(FILE *const file) const; 51dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi 5208894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, 5308894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi const LanguageModelDictContent *const originalContent, 5408894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi int *const outNgramCount); 5508894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi 56851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi ProbabilityEntry getProbabilityEntry(const int wordId) const { 57851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi return getNgramProbabilityEntry(WordIdArrayView(), wordId); 58851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi } 59851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi 60851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi bool setProbabilityEntry(const int wordId, const ProbabilityEntry *const probabilityEntry) { 61851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi return setNgramProbabilityEntry(WordIdArrayView(), wordId, probabilityEntry); 62851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi } 63851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi 64b4531d861ea740f1bf8e718f312150eb682e3f7bKeisuke Kuroyanagi bool removeProbabilityEntry(const int wordId) { 65b4531d861ea740f1bf8e718f312150eb682e3f7bKeisuke Kuroyanagi return removeNgramProbabilityEntry(WordIdArrayView(), wordId); 66b4531d861ea740f1bf8e718f312150eb682e3f7bKeisuke Kuroyanagi } 67b4531d861ea740f1bf8e718f312150eb682e3f7bKeisuke Kuroyanagi 68851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi ProbabilityEntry getNgramProbabilityEntry(const WordIdArrayView prevWordIds, 69851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi const int wordId) const; 7008894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi 71851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi bool setNgramProbabilityEntry(const WordIdArrayView prevWordIds, const int wordId, 7208894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi const ProbabilityEntry *const probabilityEntry); 7308894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi 74b4531d861ea740f1bf8e718f312150eb682e3f7bKeisuke Kuroyanagi bool removeNgramProbabilityEntry(const WordIdArrayView prevWordIds, const int wordId); 75b4531d861ea740f1bf8e718f312150eb682e3f7bKeisuke Kuroyanagi 76dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi private: 77dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi DISALLOW_COPY_AND_ASSIGN(LanguageModelDictContent); 78c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi 79c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi TrieMap mTrieMap; 8008894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi const bool mHasHistoricalInfo; 8108894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi 8208894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi bool runGCInner(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, 8308894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi const TrieMap::TrieMapRange trieMapRange, const int nextLevelBitmapEntryIndex, 8408894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi int *const outNgramCount); 859a23f0fba25137760a60e9bfaf6bf20a5889648cKeisuke Kuroyanagi int createAndGetBitmapEntryIndex(const WordIdArrayView prevWordIds); 8603dc44f543795040a092723085fac1209103b7bdKeisuke Kuroyanagi int getBitmapEntryIndex(const WordIdArrayView prevWordIds) const; 87dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi}; 88dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi} // namespace latinime 89dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi#endif /* LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H */ 90