1/* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "utils/autocorrection_threshold_utils.h" 18 19#include <algorithm> 20#include <cmath> 21 22#include "defines.h" 23#include "suggest/policyimpl/utils/edit_distance.h" 24#include "suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h" 25 26namespace latinime { 27 28const int AutocorrectionThresholdUtils::MAX_INITIAL_SCORE = 255; 29const int AutocorrectionThresholdUtils::TYPED_LETTER_MULTIPLIER = 2; 30const int AutocorrectionThresholdUtils::FULL_WORD_MULTIPLIER = 2; 31 32/* static */ int AutocorrectionThresholdUtils::editDistance(const int *before, 33 const int beforeLength, const int *after, const int afterLength) { 34 const DamerauLevenshteinEditDistancePolicy daemaruLevenshtein( 35 before, beforeLength, after, afterLength); 36 return static_cast<int>(EditDistance::getEditDistance(&daemaruLevenshtein)); 37} 38 39// In dictionary.cpp, getSuggestion() method, 40// When USE_SUGGEST_INTERFACE_FOR_TYPING is true: 41// 42// // TODO: Revise the following logic thoroughly by referring to the logic 43// // marked as "Otherwise" below. 44// SUGGEST_INTERFACE_OUTPUT_SCALE was multiplied to the original suggestion scores to convert 45// them to integers. 46// score = (int)((original score) * SUGGEST_INTERFACE_OUTPUT_SCALE) 47// Undo the scaling here to recover the original score. 48// normalizedScore = ((float)score) / SUGGEST_INTERFACE_OUTPUT_SCALE 49// 50// Otherwise: suggestion scores are computed using the below formula. 51// original score 52// := powf(mTypedLetterMultiplier (this is defined 2), 53// (the number of matched characters between typed word and suggested word)) 54// * (individual word's score which defined in the unigram dictionary, 55// and this score is defined in range [0, 255].) 56// Then, the following processing is applied. 57// - If the dictionary word is matched up to the point of the user entry 58// (full match up to min(before.length(), after.length()) 59// => Then multiply by FULL_MATCHED_WORDS_PROMOTION_RATE (this is defined 1.2) 60// - If the word is a true full match except for differences in accents or 61// capitalization, then treat it as if the score was 255. 62// - If before.length() == after.length() 63// => multiply by mFullWordMultiplier (this is defined 2)) 64// So, maximum original score is powf(2, min(before.length(), after.length())) * 255 * 2 * 1.2 65// For historical reasons we ignore the 1.2 modifier (because the measure for a good 66// autocorrection threshold was done at a time when it didn't exist). This doesn't change 67// the result. 68// So, we can normalize original score by dividing powf(2, min(b.l(),a.l())) * 255 * 2. 69 70/* static */ float AutocorrectionThresholdUtils::calcNormalizedScore(const int *before, 71 const int beforeLength, const int *after, const int afterLength, const int score) { 72 if (0 == beforeLength || 0 == afterLength) { 73 return 0.0f; 74 } 75 const int distance = editDistance(before, beforeLength, after, afterLength); 76 int spaceCount = 0; 77 for (int i = 0; i < afterLength; ++i) { 78 if (after[i] == KEYCODE_SPACE) { 79 ++spaceCount; 80 } 81 } 82 83 if (spaceCount == afterLength) { 84 return 0.0f; 85 } 86 87 if (score <= 0 || distance >= afterLength) { 88 // normalizedScore must be 0.0f (the minimum value) if the score is less than or equal to 0, 89 // or if the edit distance is larger than or equal to afterLength. 90 return 0.0f; 91 } 92 // add a weight based on edit distance. 93 const float weight = 1.0f - static_cast<float>(distance) / static_cast<float>(afterLength); 94 95 // TODO: Revise the following logic thoroughly by referring to... 96 if (true /* USE_SUGGEST_INTERFACE_FOR_TYPING */) { 97 return (static_cast<float>(score) / SUGGEST_INTERFACE_OUTPUT_SCALE) * weight; 98 } 99 // ...this logic. 100 const float maxScore = score >= S_INT_MAX ? static_cast<float>(S_INT_MAX) 101 : static_cast<float>(MAX_INITIAL_SCORE) 102 * powf(static_cast<float>(TYPED_LETTER_MULTIPLIER), 103 static_cast<float>(std::min(beforeLength, afterLength - spaceCount))) 104 * static_cast<float>(FULL_WORD_MULTIPLIER); 105 106 return (static_cast<float>(score) / maxScore) * weight; 107} 108 109} // namespace latinime 110