AndroidSpellCheckerService.java revision fee149abe0358ff0efcebff3d0b60d8be83af437
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16
17package com.android.inputmethod.latin.spellcheck;
18
19import android.content.Intent;
20import android.content.res.Resources;
21import android.service.textservice.SpellCheckerService;
22import android.text.TextUtils;
23import android.util.Log;
24import android.view.textservice.SuggestionsInfo;
25import android.view.textservice.TextInfo;
26
27import com.android.inputmethod.compat.ArraysCompatUtils;
28import com.android.inputmethod.keyboard.ProximityInfo;
29import com.android.inputmethod.latin.BinaryDictionary;
30import com.android.inputmethod.latin.Dictionary;
31import com.android.inputmethod.latin.Dictionary.DataType;
32import com.android.inputmethod.latin.Dictionary.WordCallback;
33import com.android.inputmethod.latin.DictionaryCollection;
34import com.android.inputmethod.latin.DictionaryFactory;
35import com.android.inputmethod.latin.Flag;
36import com.android.inputmethod.latin.LocaleUtils;
37import com.android.inputmethod.latin.R;
38import com.android.inputmethod.latin.SynchronouslyLoadedUserDictionary;
39import com.android.inputmethod.latin.Utils;
40import com.android.inputmethod.latin.WhitelistDictionary;
41import com.android.inputmethod.latin.WordComposer;
42
43import java.util.ArrayList;
44import java.util.Arrays;
45import java.util.Collections;
46import java.util.Locale;
47import java.util.Map;
48import java.util.TreeMap;
49
50/**
51 * Service for spell checking, using LatinIME's dictionaries and mechanisms.
52 */
53public class AndroidSpellCheckerService extends SpellCheckerService {
54    private static final String TAG = AndroidSpellCheckerService.class.getSimpleName();
55    private static final boolean DBG = false;
56    private static final int POOL_SIZE = 2;
57
58    private static final int CAPITALIZE_NONE = 0; // No caps, or mixed case
59    private static final int CAPITALIZE_FIRST = 1; // First only
60    private static final int CAPITALIZE_ALL = 2; // All caps
61
62    private final static String[] EMPTY_STRING_ARRAY = new String[0];
63    private final static SuggestionsInfo NOT_IN_DICT_EMPTY_SUGGESTIONS =
64            new SuggestionsInfo(0, EMPTY_STRING_ARRAY);
65    private final static SuggestionsInfo IN_DICT_EMPTY_SUGGESTIONS =
66            new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY,
67                    EMPTY_STRING_ARRAY);
68    private final static Flag[] USE_FULL_EDIT_DISTANCE_FLAG_ARRAY;
69    static {
70        // See BinaryDictionary.java for an explanation of these flags
71        // Specifially, ALL_CONFIG_FLAGS means that we want to consider all flags with the
72        // current dictionary configuration - for example, consider the UMLAUT flag
73        // so that it will be turned on for German dictionaries and off for others.
74        USE_FULL_EDIT_DISTANCE_FLAG_ARRAY = Arrays.copyOf(BinaryDictionary.ALL_CONFIG_FLAGS,
75                BinaryDictionary.ALL_CONFIG_FLAGS.length + 1);
76        USE_FULL_EDIT_DISTANCE_FLAG_ARRAY[BinaryDictionary.ALL_CONFIG_FLAGS.length] =
77                BinaryDictionary.FLAG_USE_FULL_EDIT_DISTANCE;
78    }
79    private Map<String, DictionaryPool> mDictionaryPools =
80            Collections.synchronizedMap(new TreeMap<String, DictionaryPool>());
81    private Map<String, Dictionary> mUserDictionaries =
82            Collections.synchronizedMap(new TreeMap<String, Dictionary>());
83    private Map<String, Dictionary> mWhitelistDictionaries =
84            Collections.synchronizedMap(new TreeMap<String, Dictionary>());
85
86    // The threshold for a candidate to be offered as a suggestion.
87    private double mSuggestionThreshold;
88    // The threshold for a suggestion to be considered "likely".
89    private double mLikelyThreshold;
90
91    @Override public void onCreate() {
92        super.onCreate();
93        mSuggestionThreshold =
94                Double.parseDouble(getString(R.string.spellchecker_suggestion_threshold_value));
95        mLikelyThreshold =
96                Double.parseDouble(getString(R.string.spellchecker_likely_threshold_value));
97    }
98
99    @Override
100    public Session createSession() {
101        return new AndroidSpellCheckerSession(this);
102    }
103
104    private static class SuggestionsGatherer implements WordCallback {
105        public static class Result {
106            public final String[] mSuggestions;
107            public final boolean mHasLikelySuggestions;
108            public Result(final String[] gatheredSuggestions, final boolean hasLikelySuggestions) {
109                mSuggestions = gatheredSuggestions;
110                mHasLikelySuggestions = hasLikelySuggestions;
111            }
112        }
113
114        private final ArrayList<CharSequence> mSuggestions;
115        private final int[] mScores;
116        private final String mOriginalText;
117        private final double mSuggestionThreshold;
118        private final double mLikelyThreshold;
119        private final int mMaxLength;
120        private int mLength = 0;
121
122        // The two following attributes are only ever filled if the requested max length
123        // is 0 (or less, which is treated the same).
124        private String mBestSuggestion = null;
125        private int mBestScore = Integer.MIN_VALUE; // As small as possible
126
127        SuggestionsGatherer(final String originalText, final double suggestionThreshold,
128                final double likelyThreshold, final int maxLength) {
129            mOriginalText = originalText;
130            mSuggestionThreshold = suggestionThreshold;
131            mLikelyThreshold = likelyThreshold;
132            mMaxLength = maxLength;
133            mSuggestions = new ArrayList<CharSequence>(maxLength + 1);
134            mScores = new int[mMaxLength];
135        }
136
137        @Override
138        synchronized public boolean addWord(char[] word, int wordOffset, int wordLength, int score,
139                int dicTypeId, DataType dataType) {
140            final int positionIndex = ArraysCompatUtils.binarySearch(mScores, 0, mLength, score);
141            // binarySearch returns the index if the element exists, and -<insertion index> - 1
142            // if it doesn't. See documentation for binarySearch.
143            final int insertIndex = positionIndex >= 0 ? positionIndex : -positionIndex - 1;
144
145            if (insertIndex == 0 && mLength >= mMaxLength) {
146                // In the future, we may want to keep track of the best suggestion score even if
147                // we are asked for 0 suggestions. In this case, we can use the following
148                // (tested) code to keep it:
149                // If the maxLength is 0 (should never be less, but if it is, it's treated as 0)
150                // then we need to keep track of the best suggestion in mBestScore and
151                // mBestSuggestion. This is so that we know whether the best suggestion makes
152                // the score cutoff, since we need to know that to return a meaningful
153                // looksLikeTypo.
154                // if (0 >= mMaxLength) {
155                //     if (score > mBestScore) {
156                //         mBestScore = score;
157                //         mBestSuggestion = new String(word, wordOffset, wordLength);
158                //     }
159                // }
160                return true;
161            }
162            if (insertIndex >= mMaxLength) {
163                // We found a suggestion, but its score is too weak to be kept considering
164                // the suggestion limit.
165                return true;
166            }
167
168            // Compute the normalized score and skip this word if it's normalized score does not
169            // make the threshold.
170            final String wordString = new String(word, wordOffset, wordLength);
171            final double normalizedScore =
172                    Utils.calcNormalizedScore(mOriginalText, wordString, score);
173            if (normalizedScore < mSuggestionThreshold) {
174                if (DBG) Log.i(TAG, wordString + " does not make the score threshold");
175                return true;
176            }
177
178            if (mLength < mMaxLength) {
179                final int copyLen = mLength - insertIndex;
180                ++mLength;
181                System.arraycopy(mScores, insertIndex, mScores, insertIndex + 1, copyLen);
182                mSuggestions.add(insertIndex, wordString);
183            } else {
184                System.arraycopy(mScores, 1, mScores, 0, insertIndex);
185                mSuggestions.add(insertIndex, wordString);
186                mSuggestions.remove(0);
187            }
188            mScores[insertIndex] = score;
189
190            return true;
191        }
192
193        public Result getResults(final int capitalizeType, final Locale locale) {
194            final String[] gatheredSuggestions;
195            final boolean hasLikelySuggestions;
196            if (0 == mLength) {
197                // Either we found no suggestions, or we found some BUT the max length was 0.
198                // If we found some mBestSuggestion will not be null. If it is null, then
199                // we found none, regardless of the max length.
200                if (null == mBestSuggestion) {
201                    gatheredSuggestions = null;
202                    hasLikelySuggestions = false;
203                } else {
204                    gatheredSuggestions = EMPTY_STRING_ARRAY;
205                    final double normalizedScore =
206                            Utils.calcNormalizedScore(mOriginalText, mBestSuggestion, mBestScore);
207                    hasLikelySuggestions = (normalizedScore > mLikelyThreshold);
208                }
209            } else {
210                if (DBG) {
211                    if (mLength != mSuggestions.size()) {
212                        Log.e(TAG, "Suggestion size is not the same as stored mLength");
213                    }
214                    for (int i = mLength - 1; i >= 0; --i) {
215                        Log.i(TAG, "" + mScores[i] + " " + mSuggestions.get(i));
216                    }
217                }
218                Collections.reverse(mSuggestions);
219                Utils.removeDupes(mSuggestions);
220                if (CAPITALIZE_ALL == capitalizeType) {
221                    for (int i = 0; i < mSuggestions.size(); ++i) {
222                        // get(i) returns a CharSequence which is actually a String so .toString()
223                        // should return the same object.
224                        mSuggestions.set(i, mSuggestions.get(i).toString().toUpperCase(locale));
225                    }
226                } else if (CAPITALIZE_FIRST == capitalizeType) {
227                    for (int i = 0; i < mSuggestions.size(); ++i) {
228                        // Likewise
229                        mSuggestions.set(i, Utils.toTitleCase(mSuggestions.get(i).toString(),
230                                locale));
231                    }
232                }
233                // This returns a String[], while toArray() returns an Object[] which cannot be cast
234                // into a String[].
235                gatheredSuggestions = mSuggestions.toArray(EMPTY_STRING_ARRAY);
236
237                final int bestScore = mScores[mLength - 1];
238                final CharSequence bestSuggestion = mSuggestions.get(0);
239                final double normalizedScore =
240                        Utils.calcNormalizedScore(mOriginalText, bestSuggestion, bestScore);
241                hasLikelySuggestions = (normalizedScore > mLikelyThreshold);
242                if (DBG) {
243                    Log.i(TAG, "Best suggestion : " + bestSuggestion + ", score " + bestScore);
244                    Log.i(TAG, "Normalized score = " + normalizedScore
245                            + " (threshold " + mLikelyThreshold
246                            + ") => hasLikelySuggestions = " + hasLikelySuggestions);
247                }
248            }
249            return new Result(gatheredSuggestions, hasLikelySuggestions);
250        }
251    }
252
253    @Override
254    public boolean onUnbind(final Intent intent) {
255        final Map<String, DictionaryPool> oldPools = mDictionaryPools;
256        mDictionaryPools = Collections.synchronizedMap(new TreeMap<String, DictionaryPool>());
257        final Map<String, Dictionary> oldUserDictionaries = mUserDictionaries;
258        mUserDictionaries = Collections.synchronizedMap(new TreeMap<String, Dictionary>());
259        final Map<String, Dictionary> oldWhitelistDictionaries = mWhitelistDictionaries;
260        mWhitelistDictionaries = Collections.synchronizedMap(new TreeMap<String, Dictionary>());
261        for (DictionaryPool pool : oldPools.values()) {
262            pool.close();
263        }
264        for (Dictionary dict : oldUserDictionaries.values()) {
265            dict.close();
266        }
267        for (Dictionary dict : oldWhitelistDictionaries.values()) {
268            dict.close();
269        }
270        return false;
271    }
272
273    private DictionaryPool getDictionaryPool(final String locale) {
274        DictionaryPool pool = mDictionaryPools.get(locale);
275        if (null == pool) {
276            final Locale localeObject = LocaleUtils.constructLocaleFromString(locale);
277            pool = new DictionaryPool(POOL_SIZE, this, localeObject);
278            mDictionaryPools.put(locale, pool);
279        }
280        return pool;
281    }
282
283    public DictAndProximity createDictAndProximity(final Locale locale) {
284        final ProximityInfo proximityInfo = ProximityInfo.createSpellCheckerProximityInfo();
285        final Resources resources = getResources();
286        final int fallbackResourceId = Utils.getMainDictionaryResourceId(resources);
287        final DictionaryCollection dictionaryCollection =
288                DictionaryFactory.createDictionaryFromManager(this, locale, fallbackResourceId,
289                        USE_FULL_EDIT_DISTANCE_FLAG_ARRAY);
290        final String localeStr = locale.toString();
291        Dictionary userDictionary = mUserDictionaries.get(localeStr);
292        if (null == userDictionary) {
293            userDictionary = new SynchronouslyLoadedUserDictionary(this, localeStr, true);
294            mUserDictionaries.put(localeStr, userDictionary);
295        }
296        dictionaryCollection.addDictionary(userDictionary);
297        Dictionary whitelistDictionary = mWhitelistDictionaries.get(localeStr);
298        if (null == whitelistDictionary) {
299            whitelistDictionary = new WhitelistDictionary(this, locale);
300            mWhitelistDictionaries.put(localeStr, whitelistDictionary);
301        }
302        dictionaryCollection.addDictionary(whitelistDictionary);
303        return new DictAndProximity(dictionaryCollection, proximityInfo);
304    }
305
306    // This method assumes the text is not empty or null.
307    private static int getCapitalizationType(String text) {
308        // If the first char is not uppercase, then the word is either all lower case,
309        // and in either case we return CAPITALIZE_NONE.
310        if (!Character.isUpperCase(text.codePointAt(0))) return CAPITALIZE_NONE;
311        final int len = text.codePointCount(0, text.length());
312        int capsCount = 1;
313        for (int i = 1; i < len; ++i) {
314            if (1 != capsCount && i != capsCount) break;
315            if (Character.isUpperCase(text.codePointAt(i))) ++capsCount;
316        }
317        // We know the first char is upper case. So we want to test if either everything
318        // else is lower case, or if everything else is upper case. If the string is
319        // exactly one char long, then we will arrive here with capsCount 1, and this is
320        // correct, too.
321        if (1 == capsCount) return CAPITALIZE_FIRST;
322        return (len == capsCount ? CAPITALIZE_ALL : CAPITALIZE_NONE);
323    }
324
325    private static class AndroidSpellCheckerSession extends Session {
326        // Immutable, but need the locale which is not available in the constructor yet
327        private DictionaryPool mDictionaryPool;
328        // Likewise
329        private Locale mLocale;
330
331        private final AndroidSpellCheckerService mService;
332
333        AndroidSpellCheckerSession(final AndroidSpellCheckerService service) {
334            mService = service;
335        }
336
337        @Override
338        public void onCreate() {
339            final String localeString = getLocale();
340            mDictionaryPool = mService.getDictionaryPool(localeString);
341            mLocale = LocaleUtils.constructLocaleFromString(localeString);
342        }
343
344        /**
345         * Finds out whether a particular string should be filtered out of spell checking.
346         *
347         * This will loosely match URLs, numbers, symbols.
348         *
349         * @param text the string to evaluate.
350         * @return true if we should filter this text out, false otherwise
351         */
352        private boolean shouldFilterOut(final String text) {
353            if (TextUtils.isEmpty(text) || text.length() <= 1) return true;
354
355            // TODO: check if an equivalent processing can't be done more quickly with a
356            // compiled regexp.
357            // Filter by first letter
358            final int firstCodePoint = text.codePointAt(0);
359            // Filter out words that don't start with a letter or an apostrophe
360            if (!Character.isLetter(firstCodePoint)
361                    && '\'' != firstCodePoint) return true;
362
363            // Filter contents
364            final int length = text.length();
365            int letterCount = 0;
366            for (int i = 0; i < length; ++i) {
367                final int codePoint = text.codePointAt(i);
368                // Any word containing a '@' is probably an e-mail address
369                // Any word containing a '/' is probably either an ad-hoc combination of two
370                // words or a URI - in either case we don't want to spell check that
371                if ('@' == codePoint
372                        || '/' == codePoint) return true;
373                if (Character.isLetter(codePoint)) ++letterCount;
374            }
375            // Guestimate heuristic: perform spell checking if at least 3/4 of the characters
376            // in this word are letters
377            return (letterCount * 4 < length * 3);
378        }
379
380        // Note : this must be reentrant
381        /**
382         * Gets a list of suggestions for a specific string. This returns a list of possible
383         * corrections for the text passed as an argument. It may split or group words, and
384         * even perform grammatical analysis.
385         */
386        @Override
387        public SuggestionsInfo onGetSuggestions(final TextInfo textInfo,
388                final int suggestionsLimit) {
389            try {
390                final String text = textInfo.getText();
391
392                if (shouldFilterOut(text)) {
393                    DictAndProximity dictInfo = null;
394                    try {
395                        dictInfo = mDictionaryPool.takeOrGetNull();
396                        if (null == dictInfo) return NOT_IN_DICT_EMPTY_SUGGESTIONS;
397                        return dictInfo.mDictionary.isValidWord(text) ? IN_DICT_EMPTY_SUGGESTIONS
398                                : NOT_IN_DICT_EMPTY_SUGGESTIONS;
399                    } finally {
400                        if (null != dictInfo) {
401                            if (!mDictionaryPool.offer(dictInfo)) {
402                                Log.e(TAG, "Can't re-insert a dictionary into its pool");
403                            }
404                        }
405                    }
406                }
407
408                // TODO: Don't gather suggestions if the limit is <= 0 unless necessary
409                final SuggestionsGatherer suggestionsGatherer = new SuggestionsGatherer(text,
410                        mService.mSuggestionThreshold, mService.mLikelyThreshold, suggestionsLimit);
411                final WordComposer composer = new WordComposer();
412                final int length = text.length();
413                for (int i = 0; i < length; ++i) {
414                    final int character = text.codePointAt(i);
415                    final int proximityIndex = SpellCheckerProximityInfo.getIndexOf(character);
416                    final int[] proximities;
417                    if (-1 == proximityIndex) {
418                        proximities = new int[] { character };
419                    } else {
420                        proximities = Arrays.copyOfRange(SpellCheckerProximityInfo.PROXIMITY,
421                                proximityIndex,
422                                proximityIndex + SpellCheckerProximityInfo.ROW_SIZE);
423                    }
424                    composer.add(character, proximities,
425                            WordComposer.NOT_A_COORDINATE, WordComposer.NOT_A_COORDINATE);
426                }
427
428                final int capitalizeType = getCapitalizationType(text);
429                boolean isInDict = true;
430                DictAndProximity dictInfo = null;
431                try {
432                    dictInfo = mDictionaryPool.takeOrGetNull();
433                    if (null == dictInfo) return NOT_IN_DICT_EMPTY_SUGGESTIONS;
434                    dictInfo.mDictionary.getWords(composer, suggestionsGatherer,
435                            dictInfo.mProximityInfo);
436                    isInDict = dictInfo.mDictionary.isValidWord(text);
437                    if (!isInDict && CAPITALIZE_NONE != capitalizeType) {
438                        // We want to test the word again if it's all caps or first caps only.
439                        // If it's fully down, we already tested it, if it's mixed case, we don't
440                        // want to test a lowercase version of it.
441                        isInDict = dictInfo.mDictionary.isValidWord(text.toLowerCase(mLocale));
442                    }
443                } finally {
444                    if (null != dictInfo) {
445                        if (!mDictionaryPool.offer(dictInfo)) {
446                            Log.e(TAG, "Can't re-insert a dictionary into its pool");
447                        }
448                    }
449                }
450
451                final SuggestionsGatherer.Result result = suggestionsGatherer.getResults(
452                        capitalizeType, mLocale);
453
454                if (DBG) {
455                    Log.i(TAG, "Spell checking results for " + text + " with suggestion limit "
456                            + suggestionsLimit);
457                    Log.i(TAG, "IsInDict = " + isInDict);
458                    Log.i(TAG, "LooksLikeTypo = " + (!isInDict));
459                    Log.i(TAG, "HasLikelySuggestions = " + result.mHasLikelySuggestions);
460                    if (null != result.mSuggestions) {
461                        for (String suggestion : result.mSuggestions) {
462                            Log.i(TAG, suggestion);
463                        }
464                    }
465                }
466
467                // TODO: actually use result.mHasLikelySuggestions
468                final int flags =
469                        (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY
470                                : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO);
471                return new SuggestionsInfo(flags, result.mSuggestions);
472            } catch (RuntimeException e) {
473                // Don't kill the keyboard if there is a bug in the spell checker
474                if (DBG) {
475                    throw e;
476                } else {
477                    Log.e(TAG, "Exception while spellcheking: " + e);
478                    return NOT_IN_DICT_EMPTY_SUGGESTIONS;
479                }
480            }
481        }
482    }
483}
484