1660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi/* 2660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi * Copyright (C) 2014, The Android Open Source Project 3660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi * 4660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi * Licensed under the Apache License, Version 2.0 (the "License"); 5660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi * you may not use this file except in compliance with the License. 6660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi * You may obtain a copy of the License at 7660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi * 8660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi * http://www.apache.org/licenses/LICENSE-2.0 9660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi * 10660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi * Unless required by applicable law or agreed to in writing, software 11660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi * distributed under the License is distributed on an "AS IS" BASIS, 12660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi * See the License for the specific language governing permissions and 14660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi * limitations under the License. 15660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi */ 16660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi 17660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi#ifndef LATINIME_DYNAMIC_LANGUAGE_MODEL_PROBABILITY_UTILS_H 18660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi#define LATINIME_DYNAMIC_LANGUAGE_MODEL_PROBABILITY_UTILS_H 19660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi 20660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi#include <algorithm> 21660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi 22660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi#include "defines.h" 2388bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/property/historical_info.h" 2478212a6d3de2c1fdaa394c58a16cbdee3ad5d046Keisuke Kuroyanagi#include "utils/ngram_utils.h" 25660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi#include "utils/time_keeper.h" 26660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi 27660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanaginamespace latinime { 28660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi 29660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagiclass DynamicLanguageModelProbabilityUtils { 30660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi public: 31660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi static float computeRawProbabilityFromCounts(const int count, const int contextCount, 3278212a6d3de2c1fdaa394c58a16cbdee3ad5d046Keisuke Kuroyanagi const NgramType ngramType) { 3378212a6d3de2c1fdaa394c58a16cbdee3ad5d046Keisuke Kuroyanagi const int minCount = ASSUMED_MIN_COUNTS[static_cast<int>(ngramType)]; 34660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi return static_cast<float>(count) / static_cast<float>(std::max(contextCount, minCount)); 35660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi } 36660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi 3778212a6d3de2c1fdaa394c58a16cbdee3ad5d046Keisuke Kuroyanagi static float backoff(const int ngramProbability, const NgramType ngramType) { 3878212a6d3de2c1fdaa394c58a16cbdee3ad5d046Keisuke Kuroyanagi const int probability = 3978212a6d3de2c1fdaa394c58a16cbdee3ad5d046Keisuke Kuroyanagi ngramProbability + ENCODED_BACKOFF_WEIGHTS[static_cast<int>(ngramType)]; 40660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi return std::min(std::max(probability, NOT_A_PROBABILITY), MAX_PROBABILITY); 41660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi } 42660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi 43660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi static int getDecayedProbability(const int probability, const HistoricalInfo historicalInfo) { 44660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi const int elapsedTime = TimeKeeper::peekCurrentTime() - historicalInfo.getTimestamp(); 45660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi if (elapsedTime < 0) { 46660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi AKLOGE("The elapsed time is negatime value. Timestamp overflow?"); 47660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi return NOT_A_PROBABILITY; 48660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi } 49660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi // TODO: Improve this logic. 50660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi // We don't modify probability depending on the elapsed time. 51660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi return probability; 52660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi } 53660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi 54660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi static int shouldRemoveEntryDuringGC(const HistoricalInfo historicalInfo) { 55660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi // TODO: Improve this logic. 56660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi const int elapsedTime = TimeKeeper::peekCurrentTime() - historicalInfo.getTimestamp(); 57660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi return elapsedTime > DURATION_TO_DISCARD_ENTRY_IN_SECONDS; 58660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi } 59660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi 60660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi static int getPriorityToPreventFromEviction(const HistoricalInfo historicalInfo) { 61660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi // TODO: Improve this logic. 62660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi // More recently input entries get higher priority. 63660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi return historicalInfo.getTimestamp(); 64660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi } 65660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi 66660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagiprivate: 67660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicLanguageModelProbabilityUtils); 68660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi 6960021bbdc25b7cda864fb3d1bf47d4f0e977e7f9Keisuke Kuroyanagi static_assert(MAX_PREV_WORD_COUNT_FOR_N_GRAM <= 3, "Max supported Ngram is Quadgram."); 70660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi 7178212a6d3de2c1fdaa394c58a16cbdee3ad5d046Keisuke Kuroyanagi static const int ASSUMED_MIN_COUNTS[]; 7278212a6d3de2c1fdaa394c58a16cbdee3ad5d046Keisuke Kuroyanagi static const int ENCODED_BACKOFF_WEIGHTS[]; 73660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi static const int DURATION_TO_DISCARD_ENTRY_IN_SECONDS; 74660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi}; 75660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi 76660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi} // namespace latinime 77660b00477c980d74be48529b9de70d9725ffc72bKeisuke Kuroyanagi#endif /* LATINIME_DYNAMIC_LANGUAGE_MODEL_PROBABILITY_UTILS_H */ 78