1dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi/*
2dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * Copyright (C) 2014, The Android Open Source Project
3dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi *
4dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * Licensed under the Apache License, Version 2.0 (the "License");
5dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * you may not use this file except in compliance with the License.
6dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * You may obtain a copy of the License at
7dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi *
8dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi *     http://www.apache.org/licenses/LICENSE-2.0
9dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi *
10dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * Unless required by applicable law or agreed to in writing, software
11dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * distributed under the License is distributed on an "AS IS" BASIS,
12dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * See the License for the specific language governing permissions and
14dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi * limitations under the License.
15dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi */
16dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi
17dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi#ifndef LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H
18dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi#define LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H
19dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi
20c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi#include <cstdio>
21063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi#include <vector>
22c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi
23dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi#include "defines.h"
2488bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/property/word_attributes.h"
2588bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/structure/v4/content/language_model_dict_content_global_counters.h"
2688bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/structure/v4/content/probability_entry.h"
2788bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/structure/v4/content/terminal_position_lookup_table.h"
2888bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/structure/v4/ver4_dict_constants.h"
2988bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/utils/entry_counters.h"
3088bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/utils/trie_map.h"
31c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi#include "utils/byte_array_view.h"
3208894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi#include "utils/int_array_view.h"
33dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi
34dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanaginamespace latinime {
35dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi
369aa6699107de4da356b8eb89fb3ca38100e19c9dKeisuke Kuroyanagiclass HeaderPolicy;
379aa6699107de4da356b8eb89fb3ca38100e19c9dKeisuke Kuroyanagi
3808894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi/**
3908894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi * Class representing language model.
4008894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi *
4108894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi * This class provides methods to get and store unigram/n-gram probability information and flags.
4208894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi */
43dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagiclass LanguageModelDictContent {
44dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi public:
4507b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi    // Pair of word id and probability entry used for iteration.
4607b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi    class WordIdAndProbabilityEntry {
4707b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi     public:
4807b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi        WordIdAndProbabilityEntry(const int wordId, const ProbabilityEntry &probabilityEntry)
4907b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi                : mWordId(wordId), mProbabilityEntry(probabilityEntry) {}
5007b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi
5107b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi        int getWordId() const { return mWordId; }
5207b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi        const ProbabilityEntry getProbabilityEntry() const { return mProbabilityEntry; }
5307b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi
5407b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi     private:
5507b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi        DISALLOW_DEFAULT_CONSTRUCTOR(WordIdAndProbabilityEntry);
5607b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi        DISALLOW_ASSIGNMENT_OPERATOR(WordIdAndProbabilityEntry);
5707b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi
5807b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi        const int mWordId;
5907b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi        const ProbabilityEntry mProbabilityEntry;
6007b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi    };
6107b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi
6207b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi    // Iterator.
6307b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi    class EntryIterator {
6407b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi     public:
6507b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi        EntryIterator(const TrieMap::TrieMapIterator &trieMapIterator,
6607b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi                const bool hasHistoricalInfo)
6707b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi                : mTrieMapIterator(trieMapIterator), mHasHistoricalInfo(hasHistoricalInfo) {}
6807b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi
6907b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi        const WordIdAndProbabilityEntry operator*() const {
7007b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi            const TrieMap::TrieMapIterator::IterationResult &result = *mTrieMapIterator;
7107b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi            return WordIdAndProbabilityEntry(
7207b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi                    result.key(), ProbabilityEntry::decode(result.value(), mHasHistoricalInfo));
7307b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi        }
7407b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi
7507b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi        bool operator!=(const EntryIterator &other) const {
7607b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi            return mTrieMapIterator != other.mTrieMapIterator;
7707b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi        }
7807b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi
7907b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi        const EntryIterator &operator++() {
8007b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi            ++mTrieMapIterator;
8107b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi            return *this;
8207b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi        }
8307b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi
8407b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi     private:
8507b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi        DISALLOW_DEFAULT_CONSTRUCTOR(EntryIterator);
8607b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi        DISALLOW_ASSIGNMENT_OPERATOR(EntryIterator);
8707b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi
8807b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi        TrieMap::TrieMapIterator mTrieMapIterator;
8907b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi        const bool mHasHistoricalInfo;
9007b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi    };
9107b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi
9207b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi    // Class represents range to use range base for loops.
9307b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi    class EntryRange {
9407b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi     public:
9507b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi        EntryRange(const TrieMap::TrieMapRange trieMapRange, const bool hasHistoricalInfo)
9607b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi                : mTrieMapRange(trieMapRange), mHasHistoricalInfo(hasHistoricalInfo) {}
9707b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi
9807b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi        EntryIterator begin() const {
9907b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi            return EntryIterator(mTrieMapRange.begin(), mHasHistoricalInfo);
10007b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi        }
10107b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi
10207b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi        EntryIterator end() const {
10307b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi            return EntryIterator(mTrieMapRange.end(), mHasHistoricalInfo);
10407b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi        }
10507b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi
10607b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi     private:
10707b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi        DISALLOW_DEFAULT_CONSTRUCTOR(EntryRange);
10807b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi        DISALLOW_ASSIGNMENT_OPERATOR(EntryRange);
10907b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi
11007b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi        const TrieMap::TrieMapRange mTrieMapRange;
11107b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi        const bool mHasHistoricalInfo;
11207b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi    };
11307b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi
114c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi    class DumppedFullEntryInfo {
115c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi     public:
116c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi        DumppedFullEntryInfo(std::vector<int> &prevWordIds, const int targetWordId,
117c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi                const WordAttributes &wordAttributes, const ProbabilityEntry &probabilityEntry)
118c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi                : mPrevWordIds(prevWordIds), mTargetWordId(targetWordId),
119c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi                  mWordAttributes(wordAttributes), mProbabilityEntry(probabilityEntry) {}
120c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi
121c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi        const WordIdArrayView getPrevWordIds() const { return WordIdArrayView(mPrevWordIds); }
122c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi        int getTargetWordId() const { return mTargetWordId; }
123c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi        const WordAttributes &getWordAttributes() const { return mWordAttributes; }
124c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi        const ProbabilityEntry &getProbabilityEntry() const { return mProbabilityEntry; }
125c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi
126c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi     private:
127c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi        DISALLOW_ASSIGNMENT_OPERATOR(DumppedFullEntryInfo);
128c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi
129c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi        const std::vector<int> mPrevWordIds;
130c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi        const int mTargetWordId;
131c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi        const WordAttributes mWordAttributes;
132c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi        const ProbabilityEntry mProbabilityEntry;
133c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi    };
134c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi
1356b0561f9d26215209e8e8895f5c35982af5158f0Keisuke Kuroyanagi    LanguageModelDictContent(const ReadWriteByteArrayView *const buffers,
136c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi            const bool hasHistoricalInfo)
1376b0561f9d26215209e8e8895f5c35982af5158f0Keisuke Kuroyanagi            : mTrieMap(buffers[TRIE_MAP_BUFFER_INDEX]),
1386b0561f9d26215209e8e8895f5c35982af5158f0Keisuke Kuroyanagi              mGlobalCounters(buffers[GLOBAL_COUNTERS_BUFFER_INDEX]),
1396b0561f9d26215209e8e8895f5c35982af5158f0Keisuke Kuroyanagi              mHasHistoricalInfo(hasHistoricalInfo) {}
140c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi
14108894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi    explicit LanguageModelDictContent(const bool hasHistoricalInfo)
1426b0561f9d26215209e8e8895f5c35982af5158f0Keisuke Kuroyanagi            : mTrieMap(), mGlobalCounters(), mHasHistoricalInfo(hasHistoricalInfo) {}
14308894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi
14408894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi    bool isNearSizeLimit() const {
1456b0561f9d26215209e8e8895f5c35982af5158f0Keisuke Kuroyanagi        return mTrieMap.isNearSizeLimit() || mGlobalCounters.needsToHalveCounters();
14608894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi    }
147c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi
148c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi    bool save(FILE *const file) const;
149dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi
15008894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi    bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
15147fc656cd79a59dab0b9c38cd15e3a66d25c267fKeisuke Kuroyanagi            const LanguageModelDictContent *const originalContent);
15208894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi
1537d911d6f91af56586fbca40672bfb77b494ee871Keisuke Kuroyanagi    const WordAttributes getWordAttributes(const WordIdArrayView prevWordIds, const int wordId,
154bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi            const bool mustMatchAllPrevWords, const HeaderPolicy *const headerPolicy) const;
155395fe8e98dc102fcad52ef34d281e83e3cd13f46Keisuke Kuroyanagi
156851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi    ProbabilityEntry getProbabilityEntry(const int wordId) const {
157851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi        return getNgramProbabilityEntry(WordIdArrayView(), wordId);
158851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi    }
159851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi
160851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi    bool setProbabilityEntry(const int wordId, const ProbabilityEntry *const probabilityEntry) {
161bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi        mGlobalCounters.addToTotalCount(probabilityEntry->getHistoricalInfo()->getCount());
162851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi        return setNgramProbabilityEntry(WordIdArrayView(), wordId, probabilityEntry);
163851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi    }
164851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi
165b4531d861ea740f1bf8e718f312150eb682e3f7bKeisuke Kuroyanagi    bool removeProbabilityEntry(const int wordId) {
166b4531d861ea740f1bf8e718f312150eb682e3f7bKeisuke Kuroyanagi        return removeNgramProbabilityEntry(WordIdArrayView(), wordId);
167b4531d861ea740f1bf8e718f312150eb682e3f7bKeisuke Kuroyanagi    }
168b4531d861ea740f1bf8e718f312150eb682e3f7bKeisuke Kuroyanagi
169851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi    ProbabilityEntry getNgramProbabilityEntry(const WordIdArrayView prevWordIds,
170851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi            const int wordId) const;
17108894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi
172851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi    bool setNgramProbabilityEntry(const WordIdArrayView prevWordIds, const int wordId,
17308894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi            const ProbabilityEntry *const probabilityEntry);
17408894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi
175b4531d861ea740f1bf8e718f312150eb682e3f7bKeisuke Kuroyanagi    bool removeNgramProbabilityEntry(const WordIdArrayView prevWordIds, const int wordId);
176b4531d861ea740f1bf8e718f312150eb682e3f7bKeisuke Kuroyanagi
17707b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi    EntryRange getProbabilityEntries(const WordIdArrayView prevWordIds) const;
17807b3b41c25e000615396399e484a041df9301449Keisuke Kuroyanagi
179c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi    std::vector<DumppedFullEntryInfo> exportAllNgramEntriesRelatedToWord(
180c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi            const HeaderPolicy *const headerPolicy, const int wordId) const;
181c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi
1825400701908262c929a77141cc84567646053d032Keisuke Kuroyanagi    bool updateAllProbabilityEntriesForGC(const HeaderPolicy *const headerPolicy,
18347fc656cd79a59dab0b9c38cd15e3a66d25c267fKeisuke Kuroyanagi            MutableEntryCounters *const outEntryCounters) {
184bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi        if (!updateAllProbabilityEntriesForGCInner(mTrieMap.getRootBitmapEntryIndex(),
185bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi                0 /* prevWordCount */, headerPolicy, mGlobalCounters.needsToHalveCounters(),
186bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi                outEntryCounters)) {
187bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi            return false;
188bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi        }
189bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi        if (mGlobalCounters.needsToHalveCounters()) {
190bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi            mGlobalCounters.halveCounters();
191bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi        }
192bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi        return true;
1939aa6699107de4da356b8eb89fb3ca38100e19c9dKeisuke Kuroyanagi    }
1949aa6699107de4da356b8eb89fb3ca38100e19c9dKeisuke Kuroyanagi
195063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi    // entryCounts should be created by updateAllProbabilityEntries.
19647fc656cd79a59dab0b9c38cd15e3a66d25c267fKeisuke Kuroyanagi    bool truncateEntries(const EntryCounts &currentEntryCounts, const EntryCounts &maxEntryCounts,
19747fc656cd79a59dab0b9c38cd15e3a66d25c267fKeisuke Kuroyanagi            const HeaderPolicy *const headerPolicy, MutableEntryCounters *const outEntryCounters);
198063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi
1995400701908262c929a77141cc84567646053d032Keisuke Kuroyanagi    bool updateAllEntriesOnInputWord(const WordIdArrayView prevWordIds, const int wordId,
2005400701908262c929a77141cc84567646053d032Keisuke Kuroyanagi            const bool isValid, const HistoricalInfo historicalInfo,
201e8750d970eed61b9239d8b2fa19648b8457696c1Keisuke Kuroyanagi            const HeaderPolicy *const headerPolicy,
202e8750d970eed61b9239d8b2fa19648b8457696c1Keisuke Kuroyanagi            MutableEntryCounters *const entryCountersToUpdate);
2035400701908262c929a77141cc84567646053d032Keisuke Kuroyanagi
204dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi private:
205dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi    DISALLOW_COPY_AND_ASSIGN(LanguageModelDictContent);
206c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi
207063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi    class EntryInfoToTurncate {
208063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi     public:
209063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi        class Comparator {
210063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi         public:
211063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi            bool operator()(const EntryInfoToTurncate &left,
212063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi                    const EntryInfoToTurncate &right) const;
213063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi         private:
214063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi            DISALLOW_ASSIGNMENT_OPERATOR(Comparator);
215063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi        };
216063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi
217bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi        EntryInfoToTurncate(const int priority, const int count, const int key,
2183601c214f80cf62eecacd84b2fb27fe9c6b14a19Keisuke Kuroyanagi                const int prevWordCount, const int *const prevWordIds);
219063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi
220bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi        int mPriority;
221bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi        // TODO: Remove.
222bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi        int mCount;
223063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi        int mKey;
2243601c214f80cf62eecacd84b2fb27fe9c6b14a19Keisuke Kuroyanagi        int mPrevWordCount;
225063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi        int mPrevWordIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1];
226063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi
227063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi     private:
228063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi        DISALLOW_DEFAULT_CONSTRUCTOR(EntryInfoToTurncate);
229063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi    };
230063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi
2316b0561f9d26215209e8e8895f5c35982af5158f0Keisuke Kuroyanagi    static const int TRIE_MAP_BUFFER_INDEX;
2326b0561f9d26215209e8e8895f5c35982af5158f0Keisuke Kuroyanagi    static const int GLOBAL_COUNTERS_BUFFER_INDEX;
2335400701908262c929a77141cc84567646053d032Keisuke Kuroyanagi
234c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi    TrieMap mTrieMap;
2356b0561f9d26215209e8e8895f5c35982af5158f0Keisuke Kuroyanagi    LanguageModelDictContentGlobalCounters mGlobalCounters;
23608894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi    const bool mHasHistoricalInfo;
23708894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi
23808894842662eff666a713a7f4deb79204a322f8cKeisuke Kuroyanagi    bool runGCInner(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
23947fc656cd79a59dab0b9c38cd15e3a66d25c267fKeisuke Kuroyanagi            const TrieMap::TrieMapRange trieMapRange, const int nextLevelBitmapEntryIndex);
2409a23f0fba25137760a60e9bfaf6bf20a5889648cKeisuke Kuroyanagi    int createAndGetBitmapEntryIndex(const WordIdArrayView prevWordIds);
24103dc44f543795040a092723085fac1209103b7bdKeisuke Kuroyanagi    int getBitmapEntryIndex(const WordIdArrayView prevWordIds) const;
2423601c214f80cf62eecacd84b2fb27fe9c6b14a19Keisuke Kuroyanagi    bool updateAllProbabilityEntriesForGCInner(const int bitmapEntryIndex, const int prevWordCount,
243bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi            const HeaderPolicy *const headerPolicy, const bool needsToHalveCounters,
244bcb52d73e206cee86a2ea126a5c3f948103057c6Keisuke Kuroyanagi            MutableEntryCounters *const outEntryCounters);
245063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi    bool turncateEntriesInSpecifiedLevel(const HeaderPolicy *const headerPolicy,
246758d09364457b9d3d0c514a7fcfc8a6e317c9222Keisuke Kuroyanagi            const int maxEntryCount, const int targetLevel, int *const outEntryCount);
247063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi    bool getEntryInfo(const HeaderPolicy *const headerPolicy, const int targetLevel,
248063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi            const int bitmapEntryIndex, std::vector<int> *const prevWordIds,
249063f86d40f2cb0d250b2166af8e1cf98ab135f8cKeisuke Kuroyanagi            std::vector<EntryInfoToTurncate> *const outEntryInfo) const;
2505400701908262c929a77141cc84567646053d032Keisuke Kuroyanagi    const ProbabilityEntry createUpdatedEntryFrom(const ProbabilityEntry &originalProbabilityEntry,
2515400701908262c929a77141cc84567646053d032Keisuke Kuroyanagi            const bool isValid, const HistoricalInfo historicalInfo,
2525400701908262c929a77141cc84567646053d032Keisuke Kuroyanagi            const HeaderPolicy *const headerPolicy) const;
253c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi    void exportAllNgramEntriesRelatedToWordInner(const HeaderPolicy *const headerPolicy,
254c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi            const int bitmapEntryIndex, std::vector<int> *const prevWordIds,
255c9865785f41e3dcbe9308f653afc69603c1e44c0Keisuke Kuroyanagi            std::vector<DumppedFullEntryInfo> *const outBummpedFullEntryInfo) const;
256dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi};
257dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi} // namespace latinime
258dc3856d7589aa3cf3dcfdee8360fa48a85983273Keisuke Kuroyanagi#endif /* LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H */
259