1/*
2 * Copyright (C) 2013, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef LATINIME_PROBABILITY_ENTRY_H
18#define LATINIME_PROBABILITY_ENTRY_H
19
20#include <climits>
21#include <cstdint>
22
23#include "defines.h"
24#include "dictionary/property/historical_info.h"
25#include "dictionary/property/ngram_property.h"
26#include "dictionary/property/unigram_property.h"
27#include "dictionary/structure/v4/ver4_dict_constants.h"
28
29namespace latinime {
30
31class ProbabilityEntry {
32 public:
33    ProbabilityEntry(const ProbabilityEntry &probabilityEntry)
34            : mFlags(probabilityEntry.mFlags), mProbability(probabilityEntry.mProbability),
35              mHistoricalInfo(probabilityEntry.mHistoricalInfo) {}
36
37    // Dummy entry
38    ProbabilityEntry()
39            : mFlags(Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY), mProbability(NOT_A_PROBABILITY),
40              mHistoricalInfo() {}
41
42    // Entry without historical information
43    ProbabilityEntry(const int flags, const int probability)
44            : mFlags(flags), mProbability(probability), mHistoricalInfo() {}
45
46    // Entry with historical information.
47    ProbabilityEntry(const int flags, const HistoricalInfo *const historicalInfo)
48            : mFlags(flags), mProbability(NOT_A_PROBABILITY), mHistoricalInfo(*historicalInfo) {}
49
50    // Create from unigram property.
51    ProbabilityEntry(const UnigramProperty *const unigramProperty)
52            : mFlags(createFlags(unigramProperty->representsBeginningOfSentence(),
53                    unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
54                    unigramProperty->isPossiblyOffensive())),
55              mProbability(unigramProperty->getProbability()),
56              mHistoricalInfo(unigramProperty->getHistoricalInfo()) {}
57
58    // Create from ngram property.
59    // TODO: Set flags.
60    ProbabilityEntry(const NgramProperty *const ngramProperty)
61            : mFlags(0), mProbability(ngramProperty->getProbability()),
62              mHistoricalInfo(ngramProperty->getHistoricalInfo()) {}
63
64    bool isValid() const {
65        return (mFlags & Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY) == 0;
66    }
67
68    bool hasHistoricalInfo() const {
69        return mHistoricalInfo.isValid();
70    }
71
72    uint8_t getFlags() const {
73        return mFlags;
74    }
75
76    int getProbability() const {
77        return mProbability;
78    }
79
80    const HistoricalInfo *getHistoricalInfo() const {
81        return &mHistoricalInfo;
82    }
83
84    bool representsBeginningOfSentence() const {
85        return (mFlags & Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE) != 0;
86    }
87
88    bool isNotAWord() const {
89        return (mFlags & Ver4DictConstants::FLAG_NOT_A_WORD) != 0;
90    }
91
92    bool isBlacklisted() const {
93        return (mFlags & Ver4DictConstants::FLAG_BLACKLISTED) != 0;
94    }
95
96    bool isPossiblyOffensive() const {
97        return (mFlags & Ver4DictConstants::FLAG_POSSIBLY_OFFENSIVE) != 0;
98    }
99
100    uint64_t encode(const bool hasHistoricalInfo) const {
101        uint64_t encodedEntry = static_cast<uint8_t>(mFlags);
102        if (hasHistoricalInfo) {
103            encodedEntry = (encodedEntry << (Ver4DictConstants::TIME_STAMP_FIELD_SIZE * CHAR_BIT))
104                    | static_cast<uint32_t>(mHistoricalInfo.getTimestamp());
105            encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_LEVEL_FIELD_SIZE * CHAR_BIT))
106                    | static_cast<uint8_t>(mHistoricalInfo.getLevel());
107            encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_COUNT_FIELD_SIZE * CHAR_BIT))
108                    | static_cast<uint16_t>(mHistoricalInfo.getCount());
109        } else {
110            encodedEntry = (encodedEntry << (Ver4DictConstants::PROBABILITY_SIZE * CHAR_BIT))
111                    | static_cast<uint8_t>(mProbability);
112        }
113        return encodedEntry;
114    }
115
116    static ProbabilityEntry decode(const uint64_t encodedEntry, const bool hasHistoricalInfo) {
117        if (hasHistoricalInfo) {
118            const int flags = readFromEncodedEntry(encodedEntry,
119                    Ver4DictConstants::FLAGS_IN_LANGUAGE_MODEL_SIZE,
120                    Ver4DictConstants::TIME_STAMP_FIELD_SIZE
121                            + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
122                            + Ver4DictConstants::WORD_COUNT_FIELD_SIZE);
123            const int timestamp = readFromEncodedEntry(encodedEntry,
124                    Ver4DictConstants::TIME_STAMP_FIELD_SIZE,
125                    Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
126                            + Ver4DictConstants::WORD_COUNT_FIELD_SIZE);
127            const int level = readFromEncodedEntry(encodedEntry,
128                    Ver4DictConstants::WORD_LEVEL_FIELD_SIZE,
129                    Ver4DictConstants::WORD_COUNT_FIELD_SIZE);
130            const int count = readFromEncodedEntry(encodedEntry,
131                    Ver4DictConstants::WORD_COUNT_FIELD_SIZE, 0 /* pos */);
132            const HistoricalInfo historicalInfo(timestamp, level, count);
133            return ProbabilityEntry(flags, &historicalInfo);
134        } else {
135            const int flags = readFromEncodedEntry(encodedEntry,
136                    Ver4DictConstants::FLAGS_IN_LANGUAGE_MODEL_SIZE,
137                    Ver4DictConstants::PROBABILITY_SIZE);
138            const int probability = readFromEncodedEntry(encodedEntry,
139                    Ver4DictConstants::PROBABILITY_SIZE, 0 /* pos */);
140            return ProbabilityEntry(flags, probability);
141        }
142    }
143
144 private:
145    // Copy constructor is public to use this class as a type of return value.
146    DISALLOW_ASSIGNMENT_OPERATOR(ProbabilityEntry);
147
148    const uint8_t mFlags;
149    const int mProbability;
150    const HistoricalInfo mHistoricalInfo;
151
152    static int readFromEncodedEntry(const uint64_t encodedEntry, const int size, const int pos) {
153        return static_cast<int>(
154                (encodedEntry >> (pos * CHAR_BIT)) & ((1ull << (size * CHAR_BIT)) - 1));
155    }
156
157    static uint8_t createFlags(const bool representsBeginningOfSentence,
158            const bool isNotAWord, const bool isBlacklisted, const bool isPossiblyOffensive) {
159        uint8_t flags = 0;
160        if (representsBeginningOfSentence) {
161            flags |= Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE;
162        }
163        if (isNotAWord) {
164            flags |= Ver4DictConstants::FLAG_NOT_A_WORD;
165        }
166        if (isBlacklisted) {
167            flags |= Ver4DictConstants::FLAG_BLACKLISTED;
168        }
169        if (isPossiblyOffensive) {
170            flags |= Ver4DictConstants::FLAG_POSSIBLY_OFFENSIVE;
171        }
172        return flags;
173    }
174};
175} // namespace latinime
176#endif /* LATINIME_PROBABILITY_ENTRY_H */
177