1660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi/*
2660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi * Copyright (C) 2014, The Android Open Source Project
3660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi *
4660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi * Licensed under the Apache License, Version 2.0 (the "License");
5660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi * you may not use this file except in compliance with the License.
6660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi * You may obtain a copy of the License at
7660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi *
8660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi *     http://www.apache.org/licenses/LICENSE-2.0
9660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi *
10660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi * Unless required by applicable law or agreed to in writing, software
11660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi * distributed under the License is distributed on an "AS IS" BASIS,
12660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi * See the License for the specific language governing permissions and
14660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi * limitations under the License.
15660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi */
16660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi
17660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi#ifndef LATINIME_DYNAMIC_LANGUAGE_MODEL_PROBABILITY_UTILS_H
18660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi#define LATINIME_DYNAMIC_LANGUAGE_MODEL_PROBABILITY_UTILS_H
19660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi
20660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi#include <algorithm>
21660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi
22660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi#include "defines.h"
2388bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/property/historical_info.h"
2478212a6d3de2c1fdaa394c58a16cbdee3ad5d046Keisuke Kuroyanagi#include "utils/ngram_utils.h"
25660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi#include "utils/time_keeper.h"
26660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi
27660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanaginamespace latinime {
28660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi
29660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagiclass DynamicLanguageModelProbabilityUtils {
30660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi public:
31660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi    static float computeRawProbabilityFromCounts(const int count, const int contextCount,
3278212a6d3de2c1fdaa394c58a16cbdee3ad5d046Keisuke Kuroyanagi            const NgramType ngramType) {
3378212a6d3de2c1fdaa394c58a16cbdee3ad5d046Keisuke Kuroyanagi        const int minCount = ASSUMED_MIN_COUNTS[static_cast<int>(ngramType)];
34660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi        return static_cast<float>(count) / static_cast<float>(std::max(contextCount, minCount));
35660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi    }
36660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi
3778212a6d3de2c1fdaa394c58a16cbdee3ad5d046Keisuke Kuroyanagi    static float backoff(const int ngramProbability, const NgramType ngramType) {
3878212a6d3de2c1fdaa394c58a16cbdee3ad5d046Keisuke Kuroyanagi        const int probability =
3978212a6d3de2c1fdaa394c58a16cbdee3ad5d046Keisuke Kuroyanagi                ngramProbability + ENCODED_BACKOFF_WEIGHTS[static_cast<int>(ngramType)];
40660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi        return std::min(std::max(probability, NOT_A_PROBABILITY), MAX_PROBABILITY);
41660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi    }
42660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi
43660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi    static int getDecayedProbability(const int probability, const HistoricalInfo historicalInfo) {
44660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi        const int elapsedTime = TimeKeeper::peekCurrentTime() - historicalInfo.getTimestamp();
45660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi        if (elapsedTime < 0) {
46660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi            AKLOGE("The elapsed time is negatime value. Timestamp overflow?");
47660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi            return NOT_A_PROBABILITY;
48660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi        }
49660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi        // TODO: Improve this logic.
50660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi        // We don't modify probability depending on the elapsed time.
51660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi        return probability;
52660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi    }
53660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi
54660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi    static int shouldRemoveEntryDuringGC(const HistoricalInfo historicalInfo) {
55660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi        // TODO: Improve this logic.
56660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi        const int elapsedTime = TimeKeeper::peekCurrentTime() - historicalInfo.getTimestamp();
57660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi        return elapsedTime > DURATION_TO_DISCARD_ENTRY_IN_SECONDS;
58660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi    }
59660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi
60660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi    static int getPriorityToPreventFromEviction(const HistoricalInfo historicalInfo) {
61660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi        // TODO: Improve this logic.
62660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi        // More recently input entries get higher priority.
63660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi        return historicalInfo.getTimestamp();
64660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi    }
65660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi
66660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagiprivate:
67660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi    DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicLanguageModelProbabilityUtils);
68660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi
6960021bbdc25b7cda864fb3d1bf47d4f0e977e7f9Keisuke Kuroyanagi    static_assert(MAX_PREV_WORD_COUNT_FOR_N_GRAM <= 3, "Max supported Ngram is Quadgram.");
70660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi
7178212a6d3de2c1fdaa394c58a16cbdee3ad5d046Keisuke Kuroyanagi    static const int ASSUMED_MIN_COUNTS[];
7278212a6d3de2c1fdaa394c58a16cbdee3ad5d046Keisuke Kuroyanagi    static const int ENCODED_BACKOFF_WEIGHTS[];
73660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi    static const int DURATION_TO_DISCARD_ENTRY_IN_SECONDS;
74660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi};
75660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi
76660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi} // namespace latinime
77660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi#endif /* LATINIME_DYNAMIC_LANGUAGE_MODEL_PROBABILITY_UTILS_H */
78