1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin.utils;
18
19import com.android.inputmethod.latin.BinaryDictionary;
20import com.android.inputmethod.latin.Dictionary;
21import com.android.inputmethod.latin.LatinImeLogger;
22import com.android.inputmethod.latin.Suggest;
23import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
24
25import android.text.TextUtils;
26import android.util.Log;
27
28import java.util.concurrent.ConcurrentHashMap;
29
30public final class AutoCorrectionUtils {
31    private static final boolean DBG = LatinImeLogger.sDBG;
32    private static final String TAG = AutoCorrectionUtils.class.getSimpleName();
33    private static final int MINIMUM_SAFETY_NET_CHAR_LENGTH = 4;
34
35    private AutoCorrectionUtils() {
36        // Purely static class: can't instantiate.
37    }
38
39    public static boolean isValidWord(final Suggest suggest, final String word,
40            final boolean ignoreCase) {
41        if (TextUtils.isEmpty(word)) {
42            return false;
43        }
44        final ConcurrentHashMap<String, Dictionary> dictionaries = suggest.getUnigramDictionaries();
45        final String lowerCasedWord = word.toLowerCase(suggest.mLocale);
46        for (final String key : dictionaries.keySet()) {
47            final Dictionary dictionary = dictionaries.get(key);
48            // It's unclear how realistically 'dictionary' can be null, but the monkey is somehow
49            // managing to get null in here. Presumably the language is changing to a language with
50            // no main dictionary and the monkey manages to type a whole word before the thread
51            // that reads the dictionary is started or something?
52            // Ideally the passed map would come out of a {@link java.util.concurrent.Future} and
53            // would be immutable once it's finished initializing, but concretely a null test is
54            // probably good enough for the time being.
55            if (null == dictionary) continue;
56            if (dictionary.isValidWord(word)
57                    || (ignoreCase && dictionary.isValidWord(lowerCasedWord))) {
58                return true;
59            }
60        }
61        return false;
62    }
63
64    public static int getMaxFrequency(final ConcurrentHashMap<String, Dictionary> dictionaries,
65            final String word) {
66        if (TextUtils.isEmpty(word)) {
67            return Dictionary.NOT_A_PROBABILITY;
68        }
69        int maxFreq = -1;
70        for (final String key : dictionaries.keySet()) {
71            final Dictionary dictionary = dictionaries.get(key);
72            if (null == dictionary) continue;
73            final int tempFreq = dictionary.getFrequency(word);
74            if (tempFreq >= maxFreq) {
75                maxFreq = tempFreq;
76            }
77        }
78        return maxFreq;
79    }
80
81    public static boolean suggestionExceedsAutoCorrectionThreshold(
82            final SuggestedWordInfo suggestion, final String consideredWord,
83            final float autoCorrectionThreshold) {
84        if (null != suggestion) {
85            // Shortlist a whitelisted word
86            if (suggestion.mKind == SuggestedWordInfo.KIND_WHITELIST) return true;
87            final int autoCorrectionSuggestionScore = suggestion.mScore;
88            // TODO: when the normalized score of the first suggestion is nearly equals to
89            //       the normalized score of the second suggestion, behave less aggressive.
90            final float normalizedScore = BinaryDictionary.calcNormalizedScore(
91                    consideredWord, suggestion.mWord, autoCorrectionSuggestionScore);
92            if (DBG) {
93                Log.d(TAG, "Normalized " + consideredWord + "," + suggestion + ","
94                        + autoCorrectionSuggestionScore + ", " + normalizedScore
95                        + "(" + autoCorrectionThreshold + ")");
96            }
97            if (normalizedScore >= autoCorrectionThreshold) {
98                if (DBG) {
99                    Log.d(TAG, "Auto corrected by S-threshold.");
100                }
101                return !shouldBlockAutoCorrectionBySafetyNet(consideredWord, suggestion.mWord);
102            }
103        }
104        return false;
105    }
106
107    // TODO: Resolve the inconsistencies between the native auto correction algorithms and
108    // this safety net
109    public static boolean shouldBlockAutoCorrectionBySafetyNet(final String typedWord,
110            final String suggestion) {
111        // Safety net for auto correction.
112        // Actually if we hit this safety net, it's a bug.
113        // If user selected aggressive auto correction mode, there is no need to use the safety
114        // net.
115        // If the length of typed word is less than MINIMUM_SAFETY_NET_CHAR_LENGTH,
116        // we should not use net because relatively edit distance can be big.
117        final int typedWordLength = typedWord.length();
118        if (typedWordLength < MINIMUM_SAFETY_NET_CHAR_LENGTH) {
119            return false;
120        }
121        final int maxEditDistanceOfNativeDictionary =
122                (typedWordLength < 5 ? 2 : typedWordLength / 2) + 1;
123        final int distance = BinaryDictionary.editDistance(typedWord, suggestion);
124        if (DBG) {
125            Log.d(TAG, "Autocorrected edit distance = " + distance
126                    + ", " + maxEditDistanceOfNativeDictionary);
127        }
128        if (distance > maxEditDistanceOfNativeDictionary) {
129            if (DBG) {
130                Log.e(TAG, "Safety net: before = " + typedWord + ", after = " + suggestion);
131                Log.e(TAG, "(Error) The edit distance of this correction exceeds limit. "
132                        + "Turning off auto-correction.");
133            }
134            return true;
135        } else {
136            return false;
137        }
138    }
139}
140