multi_bigram_map.h revision 35c62b2cc99761e97f57060ad5e3cdfad926aea7
1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef LATINIME_MULTI_BIGRAM_MAP_H
18#define LATINIME_MULTI_BIGRAM_MAP_H
19
20#include <cstddef>
21#include <unordered_map>
22
23#include "defines.h"
24#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
25#include "suggest/core/dictionary/bloom_filter.h"
26#include "suggest/core/dictionary/ngram_listener.h"
27#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
28
29namespace latinime {
30
31// Class for caching bigram maps for multiple previous word contexts. This is useful since the
32// algorithm needs to look up the set of bigrams for every word pair that occurs in every
33// multi-word suggestion.
34class MultiBigramMap {
35 public:
36    MultiBigramMap() : mBigramMaps() {}
37    ~MultiBigramMap() {}
38
39    // Look up the bigram probability for the given word pair from the cached bigram maps.
40    // Also caches the bigrams if there is space remaining and they have not been cached already.
41    int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy,
42            const int *const prevWordsPtNodePos, const int nextWordPosition,
43            const int unigramProbability);
44
45    void clear() {
46        mBigramMaps.clear();
47    }
48
49 private:
50    DISALLOW_COPY_AND_ASSIGN(MultiBigramMap);
51
52    class BigramMap : public NgramListener {
53     public:
54        BigramMap() : mBigramMap(DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP), mBloomFilter() {}
55        // Copy constructor needed for std::unordered_map.
56        BigramMap(const BigramMap &bigramMap)
57                : mBigramMap(bigramMap.mBigramMap), mBloomFilter(bigramMap.mBloomFilter) {}
58        virtual ~BigramMap() {}
59
60        void init(const DictionaryStructureWithBufferPolicy *const structurePolicy,
61                const int *const prevWordsPtNodePos);
62        int getBigramProbability(
63                const DictionaryStructureWithBufferPolicy *const structurePolicy,
64                const int nextWordPosition, const int unigramProbability) const;
65        virtual void onVisitEntry(const int ngramProbability, const int targetPtNodePos);
66
67     private:
68        static const int DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP;
69        std::unordered_map<int, int> mBigramMap;
70        BloomFilter mBloomFilter;
71    };
72
73    void addBigramsForWordPosition(
74            const DictionaryStructureWithBufferPolicy *const structurePolicy,
75            const int *const prevWordsPtNodePos);
76
77    int readBigramProbabilityFromBinaryDictionary(
78            const DictionaryStructureWithBufferPolicy *const structurePolicy,
79            const int *const prevWordsPtNodePos, const int nextWordPosition,
80            const int unigramProbability);
81
82    static const size_t MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP;
83    std::unordered_map<int, BigramMap> mBigramMaps;
84};
85} // namespace latinime
86#endif // LATINIME_MULTI_BIGRAM_MAP_H
87