1dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi/* 2dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * Copyright (C) 2014, The Android Open Source Project 3dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * 4dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * Licensed under the Apache License, Version 2.0 (the "License"); 5dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * you may not use this file except in compliance with the License. 6dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * You may obtain a copy of the License at 7dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * 8dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * http://www.apache.org/licenses/LICENSE-2.0 9dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * 10dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * Unless required by applicable law or agreed to in writing, software 11dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * distributed under the License is distributed on an "AS IS" BASIS, 12dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * See the License for the specific language governing permissions and 14dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * limitations under the License. 15dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi */ 16dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi 17dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi#ifndef LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H 18dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi#define LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H 19dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi 20c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi#include <cstdio> 21063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi#include <vector> 22c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi 23dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi#include "defines.h" 2488bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/property/word_attributes.h" 2588bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/structure/v4/content/language_model_dict_content_global_counters.h" 2688bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/structure/v4/content/probability_entry.h" 2788bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/structure/v4/content/terminal_position_lookup_table.h" 2888bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/structure/v4/ver4_dict_constants.h" 2988bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/utils/entry_counters.h" 3088bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/utils/trie_map.h" 31c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi#include "utils/byte_array_view.h" 3208894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi#include "utils/int_array_view.h" 33dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi 34dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanaginamespace latinime { 35dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi 369aa6699107de4da356b8eb89fb3ca38100e19c9dKeisuke Kuroyanagiclass HeaderPolicy; 379aa6699107de4da356b8eb89fb3ca38100e19c9dKeisuke Kuroyanagi 3808894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi/** 3908894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi * Class representing language model. 4008894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi * 4108894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi * This class provides methods to get and store unigram/n-gram probability information and flags. 4208894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi */ 43dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagiclass LanguageModelDictContent { 44dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi public: 4507b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi // Pair of word id and probability entry used for iteration. 4607b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi class WordIdAndProbabilityEntry { 4707b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi public: 4807b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi WordIdAndProbabilityEntry(const int wordId, const ProbabilityEntry &probabilityEntry) 4907b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi : mWordId(wordId), mProbabilityEntry(probabilityEntry) {} 5007b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi 5107b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi int getWordId() const { return mWordId; } 5207b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi const ProbabilityEntry getProbabilityEntry() const { return mProbabilityEntry; } 5307b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi 5407b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi private: 5507b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi DISALLOW_DEFAULT_CONSTRUCTOR(WordIdAndProbabilityEntry); 5607b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi DISALLOW_ASSIGNMENT_OPERATOR(WordIdAndProbabilityEntry); 5707b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi 5807b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi const int mWordId; 5907b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi const ProbabilityEntry mProbabilityEntry; 6007b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi }; 6107b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi 6207b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi // Iterator. 6307b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi class EntryIterator { 6407b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi public: 6507b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi EntryIterator(const TrieMap::TrieMapIterator &trieMapIterator, 6607b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi const bool hasHistoricalInfo) 6707b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi : mTrieMapIterator(trieMapIterator), mHasHistoricalInfo(hasHistoricalInfo) {} 6807b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi 6907b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi const WordIdAndProbabilityEntry operator*() const { 7007b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi const TrieMap::TrieMapIterator::IterationResult &result = *mTrieMapIterator; 7107b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi return WordIdAndProbabilityEntry( 7207b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi result.key(), ProbabilityEntry::decode(result.value(), mHasHistoricalInfo)); 7307b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi } 7407b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi 7507b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi bool operator!=(const EntryIterator &other) const { 7607b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi return mTrieMapIterator != other.mTrieMapIterator; 7707b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi } 7807b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi 7907b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi const EntryIterator &operator++() { 8007b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi ++mTrieMapIterator; 8107b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi return *this; 8207b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi } 8307b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi 8407b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi private: 8507b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi DISALLOW_DEFAULT_CONSTRUCTOR(EntryIterator); 8607b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi DISALLOW_ASSIGNMENT_OPERATOR(EntryIterator); 8707b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi 8807b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi TrieMap::TrieMapIterator mTrieMapIterator; 8907b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi const bool mHasHistoricalInfo; 9007b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi }; 9107b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi 9207b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi // Class represents range to use range base for loops. 9307b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi class EntryRange { 9407b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi public: 9507b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi EntryRange(const TrieMap::TrieMapRange trieMapRange, const bool hasHistoricalInfo) 9607b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi : mTrieMapRange(trieMapRange), mHasHistoricalInfo(hasHistoricalInfo) {} 9707b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi 9807b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi EntryIterator begin() const { 9907b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi return EntryIterator(mTrieMapRange.begin(), mHasHistoricalInfo); 10007b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi } 10107b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi 10207b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi EntryIterator end() const { 10307b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi return EntryIterator(mTrieMapRange.end(), mHasHistoricalInfo); 10407b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi } 10507b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi 10607b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi private: 10707b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi DISALLOW_DEFAULT_CONSTRUCTOR(EntryRange); 10807b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi DISALLOW_ASSIGNMENT_OPERATOR(EntryRange); 10907b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi 11007b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi const TrieMap::TrieMapRange mTrieMapRange; 11107b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi const bool mHasHistoricalInfo; 11207b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi }; 11307b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi 114c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi class DumppedFullEntryInfo { 115c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi public: 116c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi DumppedFullEntryInfo(std::vector<int> &prevWordIds, const int targetWordId, 117c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi const WordAttributes &wordAttributes, const ProbabilityEntry &probabilityEntry) 118c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi : mPrevWordIds(prevWordIds), mTargetWordId(targetWordId), 119c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi mWordAttributes(wordAttributes), mProbabilityEntry(probabilityEntry) {} 120c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi 121c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi const WordIdArrayView getPrevWordIds() const { return WordIdArrayView(mPrevWordIds); } 122c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi int getTargetWordId() const { return mTargetWordId; } 123c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi const WordAttributes &getWordAttributes() const { return mWordAttributes; } 124c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi const ProbabilityEntry &getProbabilityEntry() const { return mProbabilityEntry; } 125c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi 126c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi private: 127c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi DISALLOW_ASSIGNMENT_OPERATOR(DumppedFullEntryInfo); 128c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi 129c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi const std::vector<int> mPrevWordIds; 130c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi const int mTargetWordId; 131c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi const WordAttributes mWordAttributes; 132c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi const ProbabilityEntry mProbabilityEntry; 133c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi }; 134c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi 1356b0561f9d26215209e8e8895f5c35982af5158f0Keisuke Kuroyanagi LanguageModelDictContent(const ReadWriteByteArrayView *const buffers, 136c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi const bool hasHistoricalInfo) 1376b0561f9d26215209e8e8895f5c35982af5158f0Keisuke Kuroyanagi : mTrieMap(buffers[TRIE_MAP_BUFFER_INDEX]), 1386b0561f9d26215209e8e8895f5c35982af5158f0Keisuke Kuroyanagi mGlobalCounters(buffers[GLOBAL_COUNTERS_BUFFER_INDEX]), 1396b0561f9d26215209e8e8895f5c35982af5158f0Keisuke Kuroyanagi mHasHistoricalInfo(hasHistoricalInfo) {} 140c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi 14108894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi explicit LanguageModelDictContent(const bool hasHistoricalInfo) 1426b0561f9d26215209e8e8895f5c35982af5158f0Keisuke Kuroyanagi : mTrieMap(), mGlobalCounters(), mHasHistoricalInfo(hasHistoricalInfo) {} 14308894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi 14408894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi bool isNearSizeLimit() const { 1456b0561f9d26215209e8e8895f5c35982af5158f0Keisuke Kuroyanagi return mTrieMap.isNearSizeLimit() || mGlobalCounters.needsToHalveCounters(); 14608894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi } 147c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi 148c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi bool save(FILE *const file) const; 149dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi 15008894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, 15147fc656cd79a59dab0b9c38cd15e3a66d25c267fKeisuke Kuroyanagi const LanguageModelDictContent *const originalContent); 15208894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi 1537d911d6f91af56586fbca40672bfb77b494ee871Keisuke Kuroyanagi const WordAttributes getWordAttributes(const WordIdArrayView prevWordIds, const int wordId, 154bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi const bool mustMatchAllPrevWords, const HeaderPolicy *const headerPolicy) const; 155395fe8e98dc102fcad52ef34d281e83e3cd13f46Keisuke Kuroyanagi 156851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi ProbabilityEntry getProbabilityEntry(const int wordId) const { 157851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi return getNgramProbabilityEntry(WordIdArrayView(), wordId); 158851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi } 159851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi 160851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi bool setProbabilityEntry(const int wordId, const ProbabilityEntry *const probabilityEntry) { 161bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi mGlobalCounters.addToTotalCount(probabilityEntry->getHistoricalInfo()->getCount()); 162851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi return setNgramProbabilityEntry(WordIdArrayView(), wordId, probabilityEntry); 163851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi } 164851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi 165b4531d861ea740f1bf8e718f312150eb682e3f7bKeisuke Kuroyanagi bool removeProbabilityEntry(const int wordId) { 166b4531d861ea740f1bf8e718f312150eb682e3f7bKeisuke Kuroyanagi return removeNgramProbabilityEntry(WordIdArrayView(), wordId); 167b4531d861ea740f1bf8e718f312150eb682e3f7bKeisuke Kuroyanagi } 168b4531d861ea740f1bf8e718f312150eb682e3f7bKeisuke Kuroyanagi 169851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi ProbabilityEntry getNgramProbabilityEntry(const WordIdArrayView prevWordIds, 170851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi const int wordId) const; 17108894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi 172851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi bool setNgramProbabilityEntry(const WordIdArrayView prevWordIds, const int wordId, 17308894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi const ProbabilityEntry *const probabilityEntry); 17408894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi 175b4531d861ea740f1bf8e718f312150eb682e3f7bKeisuke Kuroyanagi bool removeNgramProbabilityEntry(const WordIdArrayView prevWordIds, const int wordId); 176b4531d861ea740f1bf8e718f312150eb682e3f7bKeisuke Kuroyanagi 17707b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi EntryRange getProbabilityEntries(const WordIdArrayView prevWordIds) const; 17807b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi 179c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi std::vector<DumppedFullEntryInfo> exportAllNgramEntriesRelatedToWord( 180c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi const HeaderPolicy *const headerPolicy, const int wordId) const; 181c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi 1825400701908262c929a77141cc84567646053d032Keisuke Kuroyanagi bool updateAllProbabilityEntriesForGC(const HeaderPolicy *const headerPolicy, 18347fc656cd79a59dab0b9c38cd15e3a66d25c267fKeisuke Kuroyanagi MutableEntryCounters *const outEntryCounters) { 184bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi if (!updateAllProbabilityEntriesForGCInner(mTrieMap.getRootBitmapEntryIndex(), 185bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi 0 /* prevWordCount */, headerPolicy, mGlobalCounters.needsToHalveCounters(), 186bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi outEntryCounters)) { 187bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi return false; 188bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi } 189bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi if (mGlobalCounters.needsToHalveCounters()) { 190bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi mGlobalCounters.halveCounters(); 191bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi } 192bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi return true; 1939aa6699107de4da356b8eb89fb3ca38100e19c9dKeisuke Kuroyanagi } 1949aa6699107de4da356b8eb89fb3ca38100e19c9dKeisuke Kuroyanagi 195063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi // entryCounts should be created by updateAllProbabilityEntries. 19647fc656cd79a59dab0b9c38cd15e3a66d25c267fKeisuke Kuroyanagi bool truncateEntries(const EntryCounts ¤tEntryCounts, const EntryCounts &maxEntryCounts, 19747fc656cd79a59dab0b9c38cd15e3a66d25c267fKeisuke Kuroyanagi const HeaderPolicy *const headerPolicy, MutableEntryCounters *const outEntryCounters); 198063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi 1995400701908262c929a77141cc84567646053d032Keisuke Kuroyanagi bool updateAllEntriesOnInputWord(const WordIdArrayView prevWordIds, const int wordId, 2005400701908262c929a77141cc84567646053d032Keisuke Kuroyanagi const bool isValid, const HistoricalInfo historicalInfo, 201e8750d970eed61b9239d8b2fa19648b8457696c1Keisuke Kuroyanagi const HeaderPolicy *const headerPolicy, 202e8750d970eed61b9239d8b2fa19648b8457696c1Keisuke Kuroyanagi MutableEntryCounters *const entryCountersToUpdate); 2035400701908262c929a77141cc84567646053d032Keisuke Kuroyanagi 204dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi private: 205dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi DISALLOW_COPY_AND_ASSIGN(LanguageModelDictContent); 206c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi 207063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi class EntryInfoToTurncate { 208063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi public: 209063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi class Comparator { 210063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi public: 211063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi bool operator()(const EntryInfoToTurncate &left, 212063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi const EntryInfoToTurncate &right) const; 213063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi private: 214063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi DISALLOW_ASSIGNMENT_OPERATOR(Comparator); 215063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi }; 216063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi 217bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi EntryInfoToTurncate(const int priority, const int count, const int key, 2183601c214f80cf62eecacd84b2fb27fe9c6b14a19Keisuke Kuroyanagi const int prevWordCount, const int *const prevWordIds); 219063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi 220bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi int mPriority; 221bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi // TODO: Remove. 222bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi int mCount; 223063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi int mKey; 2243601c214f80cf62eecacd84b2fb27fe9c6b14a19Keisuke Kuroyanagi int mPrevWordCount; 225063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi int mPrevWordIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1]; 226063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi 227063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi private: 228063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi DISALLOW_DEFAULT_CONSTRUCTOR(EntryInfoToTurncate); 229063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi }; 230063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi 2316b0561f9d26215209e8e8895f5c35982af5158f0Keisuke Kuroyanagi static const int TRIE_MAP_BUFFER_INDEX; 2326b0561f9d26215209e8e8895f5c35982af5158f0Keisuke Kuroyanagi static const int GLOBAL_COUNTERS_BUFFER_INDEX; 2335400701908262c929a77141cc84567646053d032Keisuke Kuroyanagi 234c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi TrieMap mTrieMap; 2356b0561f9d26215209e8e8895f5c35982af5158f0Keisuke Kuroyanagi LanguageModelDictContentGlobalCounters mGlobalCounters; 23608894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi const bool mHasHistoricalInfo; 23708894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi 23808894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi bool runGCInner(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, 23947fc656cd79a59dab0b9c38cd15e3a66d25c267fKeisuke Kuroyanagi const TrieMap::TrieMapRange trieMapRange, const int nextLevelBitmapEntryIndex); 2409a23f0fba25137760a60e9bfaf6bf20a5889648cKeisuke Kuroyanagi int createAndGetBitmapEntryIndex(const WordIdArrayView prevWordIds); 24103dc44f543795040a092723085fac1209103b7bdKeisuke Kuroyanagi int getBitmapEntryIndex(const WordIdArrayView prevWordIds) const; 2423601c214f80cf62eecacd84b2fb27fe9c6b14a19Keisuke Kuroyanagi bool updateAllProbabilityEntriesForGCInner(const int bitmapEntryIndex, const int prevWordCount, 243bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi const HeaderPolicy *const headerPolicy, const bool needsToHalveCounters, 244bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi MutableEntryCounters *const outEntryCounters); 245063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi bool turncateEntriesInSpecifiedLevel(const HeaderPolicy *const headerPolicy, 246758d09364457b9d3d0c514a7fcfc8a6e317c9222Keisuke Kuroyanagi const int maxEntryCount, const int targetLevel, int *const outEntryCount); 247063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi bool getEntryInfo(const HeaderPolicy *const headerPolicy, const int targetLevel, 248063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi const int bitmapEntryIndex, std::vector<int> *const prevWordIds, 249063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi std::vector<EntryInfoToTurncate> *const outEntryInfo) const; 2505400701908262c929a77141cc84567646053d032Keisuke Kuroyanagi const ProbabilityEntry createUpdatedEntryFrom(const ProbabilityEntry &originalProbabilityEntry, 2515400701908262c929a77141cc84567646053d032Keisuke Kuroyanagi const bool isValid, const HistoricalInfo historicalInfo, 2525400701908262c929a77141cc84567646053d032Keisuke Kuroyanagi const HeaderPolicy *const headerPolicy) const; 253c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi void exportAllNgramEntriesRelatedToWordInner(const HeaderPolicy *const headerPolicy, 254c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi const int bitmapEntryIndex, std::vector<int> *const prevWordIds, 255c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi std::vector<DumppedFullEntryInfo> *const outBummpedFullEntryInfo) const; 256dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi}; 257dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi} // namespace latinime 258dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi#endif /* LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H */ 259