AndroidSpellCheckerService.java revision 51075d145a85d1acaff08c02f4d6b10b175eaa36
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16
17package com.android.inputmethod.latin.spellcheck;
18
19import android.content.Intent;
20import android.content.res.Resources;
21import android.service.textservice.SpellCheckerService;
22import android.text.TextUtils;
23import android.util.Log;
24import android.view.textservice.SuggestionsInfo;
25import android.view.textservice.TextInfo;
26
27import com.android.inputmethod.compat.ArraysCompatUtils;
28import com.android.inputmethod.keyboard.ProximityInfo;
29import com.android.inputmethod.latin.BinaryDictionary;
30import com.android.inputmethod.latin.Dictionary;
31import com.android.inputmethod.latin.Dictionary.DataType;
32import com.android.inputmethod.latin.Dictionary.WordCallback;
33import com.android.inputmethod.latin.DictionaryCollection;
34import com.android.inputmethod.latin.DictionaryFactory;
35import com.android.inputmethod.latin.Flag;
36import com.android.inputmethod.latin.LocaleUtils;
37import com.android.inputmethod.latin.R;
38import com.android.inputmethod.latin.SynchronouslyLoadedUserDictionary;
39import com.android.inputmethod.latin.Utils;
40import com.android.inputmethod.latin.WordComposer;
41
42import java.util.ArrayList;
43import java.util.Arrays;
44import java.util.Collections;
45import java.util.Locale;
46import java.util.Map;
47import java.util.TreeMap;
48
49/**
50 * Service for spell checking, using LatinIME's dictionaries and mechanisms.
51 */
52public class AndroidSpellCheckerService extends SpellCheckerService {
53    private static final String TAG = AndroidSpellCheckerService.class.getSimpleName();
54    private static final boolean DBG = false;
55    private static final int POOL_SIZE = 2;
56
57    private static final int CAPITALIZE_NONE = 0; // No caps, or mixed case
58    private static final int CAPITALIZE_FIRST = 1; // First only
59    private static final int CAPITALIZE_ALL = 2; // All caps
60
61    private final static String[] EMPTY_STRING_ARRAY = new String[0];
62    private final static SuggestionsInfo NOT_IN_DICT_EMPTY_SUGGESTIONS =
63            new SuggestionsInfo(0, EMPTY_STRING_ARRAY);
64    private final static SuggestionsInfo IN_DICT_EMPTY_SUGGESTIONS =
65            new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY,
66                    EMPTY_STRING_ARRAY);
67    private final static Flag[] USE_FULL_EDIT_DISTANCE_FLAG_ARRAY;
68    static {
69        // See BinaryDictionary.java for an explanation of these flags
70        // Specifially, ALL_CONFIG_FLAGS means that we want to consider all flags with the
71        // current dictionary configuration - for example, consider the UMLAUT flag
72        // so that it will be turned on for German dictionaries and off for others.
73        USE_FULL_EDIT_DISTANCE_FLAG_ARRAY = Arrays.copyOf(BinaryDictionary.ALL_CONFIG_FLAGS,
74                BinaryDictionary.ALL_CONFIG_FLAGS.length + 1);
75        USE_FULL_EDIT_DISTANCE_FLAG_ARRAY[BinaryDictionary.ALL_CONFIG_FLAGS.length] =
76                BinaryDictionary.FLAG_USE_FULL_EDIT_DISTANCE;
77    }
78    private Map<String, DictionaryPool> mDictionaryPools =
79            Collections.synchronizedMap(new TreeMap<String, DictionaryPool>());
80    private Map<String, Dictionary> mUserDictionaries =
81            Collections.synchronizedMap(new TreeMap<String, Dictionary>());
82
83    // The threshold for a candidate to be offered as a suggestion.
84    private double mSuggestionThreshold;
85    // The threshold for a suggestion to be considered "likely".
86    private double mLikelyThreshold;
87
88    @Override public void onCreate() {
89        super.onCreate();
90        mSuggestionThreshold =
91                Double.parseDouble(getString(R.string.spellchecker_suggestion_threshold_value));
92        mLikelyThreshold =
93                Double.parseDouble(getString(R.string.spellchecker_likely_threshold_value));
94    }
95
96    @Override
97    public Session createSession() {
98        return new AndroidSpellCheckerSession(this);
99    }
100
101    private static class SuggestionsGatherer implements WordCallback {
102        public static class Result {
103            public final String[] mSuggestions;
104            public final boolean mHasLikelySuggestions;
105            public Result(final String[] gatheredSuggestions, final boolean hasLikelySuggestions) {
106                mSuggestions = gatheredSuggestions;
107                mHasLikelySuggestions = hasLikelySuggestions;
108            }
109        }
110
111        private final ArrayList<CharSequence> mSuggestions;
112        private final int[] mScores;
113        private final String mOriginalText;
114        private final double mSuggestionThreshold;
115        private final double mLikelyThreshold;
116        private final int mMaxLength;
117        private int mLength = 0;
118
119        // The two following attributes are only ever filled if the requested max length
120        // is 0 (or less, which is treated the same).
121        private String mBestSuggestion = null;
122        private int mBestScore = Integer.MIN_VALUE; // As small as possible
123
124        SuggestionsGatherer(final String originalText, final double suggestionThreshold,
125                final double likelyThreshold, final int maxLength) {
126            mOriginalText = originalText;
127            mSuggestionThreshold = suggestionThreshold;
128            mLikelyThreshold = likelyThreshold;
129            mMaxLength = maxLength;
130            mSuggestions = new ArrayList<CharSequence>(maxLength + 1);
131            mScores = new int[mMaxLength];
132        }
133
134        @Override
135        synchronized public boolean addWord(char[] word, int wordOffset, int wordLength, int score,
136                int dicTypeId, DataType dataType) {
137            final int positionIndex = ArraysCompatUtils.binarySearch(mScores, 0, mLength, score);
138            // binarySearch returns the index if the element exists, and -<insertion index> - 1
139            // if it doesn't. See documentation for binarySearch.
140            final int insertIndex = positionIndex >= 0 ? positionIndex : -positionIndex - 1;
141
142            if (insertIndex == 0 && mLength >= mMaxLength) {
143                // In the future, we may want to keep track of the best suggestion score even if
144                // we are asked for 0 suggestions. In this case, we can use the following
145                // (tested) code to keep it:
146                // If the maxLength is 0 (should never be less, but if it is, it's treated as 0)
147                // then we need to keep track of the best suggestion in mBestScore and
148                // mBestSuggestion. This is so that we know whether the best suggestion makes
149                // the score cutoff, since we need to know that to return a meaningful
150                // looksLikeTypo.
151                // if (0 >= mMaxLength) {
152                //     if (score > mBestScore) {
153                //         mBestScore = score;
154                //         mBestSuggestion = new String(word, wordOffset, wordLength);
155                //     }
156                // }
157                return true;
158            }
159
160            // Compute the normalized score and skip this word if it's normalized score does not
161            // make the threshold.
162            final String wordString = new String(word, wordOffset, wordLength);
163            final double normalizedScore =
164                    Utils.calcNormalizedScore(mOriginalText, wordString, score);
165            if (normalizedScore < mSuggestionThreshold) {
166                if (DBG) Log.i(TAG, wordString + " does not make the score threshold");
167                return true;
168            }
169
170            if (mLength < mMaxLength) {
171                final int copyLen = mLength - insertIndex;
172                ++mLength;
173                System.arraycopy(mScores, insertIndex, mScores, insertIndex + 1, copyLen);
174                mSuggestions.add(insertIndex, wordString);
175            } else {
176                System.arraycopy(mScores, 1, mScores, 0, insertIndex);
177                mSuggestions.add(insertIndex, wordString);
178                mSuggestions.remove(0);
179            }
180            mScores[insertIndex] = score;
181
182            return true;
183        }
184
185        public Result getResults(final int capitalizeType, final Locale locale) {
186            final String[] gatheredSuggestions;
187            final boolean hasLikelySuggestions;
188            if (0 == mLength) {
189                // Either we found no suggestions, or we found some BUT the max length was 0.
190                // If we found some mBestSuggestion will not be null. If it is null, then
191                // we found none, regardless of the max length.
192                if (null == mBestSuggestion) {
193                    gatheredSuggestions = null;
194                    hasLikelySuggestions = false;
195                } else {
196                    gatheredSuggestions = EMPTY_STRING_ARRAY;
197                    final double normalizedScore =
198                            Utils.calcNormalizedScore(mOriginalText, mBestSuggestion, mBestScore);
199                    hasLikelySuggestions = (normalizedScore > mLikelyThreshold);
200                }
201            } else {
202                if (DBG) {
203                    if (mLength != mSuggestions.size()) {
204                        Log.e(TAG, "Suggestion size is not the same as stored mLength");
205                    }
206                    for (int i = mLength - 1; i >= 0; --i) {
207                        Log.i(TAG, "" + mScores[i] + " " + mSuggestions.get(i));
208                    }
209                }
210                Collections.reverse(mSuggestions);
211                Utils.removeDupes(mSuggestions);
212                if (CAPITALIZE_ALL == capitalizeType) {
213                    for (int i = 0; i < mSuggestions.size(); ++i) {
214                        // get(i) returns a CharSequence which is actually a String so .toString()
215                        // should return the same object.
216                        mSuggestions.set(i, mSuggestions.get(i).toString().toUpperCase(locale));
217                    }
218                } else if (CAPITALIZE_FIRST == capitalizeType) {
219                    for (int i = 0; i < mSuggestions.size(); ++i) {
220                        // Likewise
221                        mSuggestions.set(i, Utils.toTitleCase(mSuggestions.get(i).toString(),
222                                locale));
223                    }
224                }
225                // This returns a String[], while toArray() returns an Object[] which cannot be cast
226                // into a String[].
227                gatheredSuggestions = mSuggestions.toArray(EMPTY_STRING_ARRAY);
228
229                final int bestScore = mScores[mLength - 1];
230                final CharSequence bestSuggestion = mSuggestions.get(0);
231                final double normalizedScore =
232                        Utils.calcNormalizedScore(mOriginalText, bestSuggestion, bestScore);
233                hasLikelySuggestions = (normalizedScore > mLikelyThreshold);
234                if (DBG) {
235                    Log.i(TAG, "Best suggestion : " + bestSuggestion + ", score " + bestScore);
236                    Log.i(TAG, "Normalized score = " + normalizedScore
237                            + " (threshold " + mLikelyThreshold
238                            + ") => hasLikelySuggestions = " + hasLikelySuggestions);
239                }
240            }
241            return new Result(gatheredSuggestions, hasLikelySuggestions);
242        }
243    }
244
245    @Override
246    public boolean onUnbind(final Intent intent) {
247        final Map<String, DictionaryPool> oldPools = mDictionaryPools;
248        mDictionaryPools = Collections.synchronizedMap(new TreeMap<String, DictionaryPool>());
249        final Map<String, Dictionary> oldUserDictionaries = mUserDictionaries;
250        mUserDictionaries = Collections.synchronizedMap(new TreeMap<String, Dictionary>());
251        for (DictionaryPool pool : oldPools.values()) {
252            pool.close();
253        }
254        for (Dictionary dict : oldUserDictionaries.values()) {
255            dict.close();
256        }
257        return false;
258    }
259
260    private DictionaryPool getDictionaryPool(final String locale) {
261        DictionaryPool pool = mDictionaryPools.get(locale);
262        if (null == pool) {
263            final Locale localeObject = LocaleUtils.constructLocaleFromString(locale);
264            pool = new DictionaryPool(POOL_SIZE, this, localeObject);
265            mDictionaryPools.put(locale, pool);
266        }
267        return pool;
268    }
269
270    public DictAndProximity createDictAndProximity(final Locale locale) {
271        final ProximityInfo proximityInfo = ProximityInfo.createSpellCheckerProximityInfo();
272        final Resources resources = getResources();
273        final int fallbackResourceId = Utils.getMainDictionaryResourceId(resources);
274        final DictionaryCollection dictionaryCollection =
275                DictionaryFactory.createDictionaryFromManager(this, locale, fallbackResourceId,
276                        USE_FULL_EDIT_DISTANCE_FLAG_ARRAY);
277        final String localeStr = locale.toString();
278        Dictionary userDict = mUserDictionaries.get(localeStr);
279        if (null == userDict) {
280            userDict = new SynchronouslyLoadedUserDictionary(this, localeStr, true);
281            mUserDictionaries.put(localeStr, userDict);
282        }
283        dictionaryCollection.addDictionary(userDict);
284        return new DictAndProximity(dictionaryCollection, proximityInfo);
285    }
286
287    // This method assumes the text is not empty or null.
288    private static int getCapitalizationType(String text) {
289        // If the first char is not uppercase, then the word is either all lower case,
290        // and in either case we return CAPITALIZE_NONE.
291        if (!Character.isUpperCase(text.codePointAt(0))) return CAPITALIZE_NONE;
292        final int len = text.codePointCount(0, text.length());
293        int capsCount = 1;
294        for (int i = 1; i < len; ++i) {
295            if (1 != capsCount && i != capsCount) break;
296            if (Character.isUpperCase(text.codePointAt(i))) ++capsCount;
297        }
298        // We know the first char is upper case. So we want to test if either everything
299        // else is lower case, or if everything else is upper case. If the string is
300        // exactly one char long, then we will arrive here with capsCount 1, and this is
301        // correct, too.
302        if (1 == capsCount) return CAPITALIZE_FIRST;
303        return (len == capsCount ? CAPITALIZE_ALL : CAPITALIZE_NONE);
304    }
305
306    private static class AndroidSpellCheckerSession extends Session {
307        // Immutable, but need the locale which is not available in the constructor yet
308        private DictionaryPool mDictionaryPool;
309        // Likewise
310        private Locale mLocale;
311
312        private final AndroidSpellCheckerService mService;
313
314        AndroidSpellCheckerSession(final AndroidSpellCheckerService service) {
315            mService = service;
316        }
317
318        @Override
319        public void onCreate() {
320            final String localeString = getLocale();
321            mDictionaryPool = mService.getDictionaryPool(localeString);
322            mLocale = LocaleUtils.constructLocaleFromString(localeString);
323        }
324
325        /**
326         * Finds out whether a particular string should be filtered out of spell checking.
327         *
328         * This will loosely match URLs, numbers, symbols.
329         *
330         * @param text the string to evaluate.
331         * @return true if we should filter this text out, false otherwise
332         */
333        private boolean shouldFilterOut(final String text) {
334            if (TextUtils.isEmpty(text) || text.length() <= 1) return true;
335
336            // TODO: check if an equivalent processing can't be done more quickly with a
337            // compiled regexp.
338            // Filter by first letter
339            final int firstCodePoint = text.codePointAt(0);
340            // Filter out words that don't start with a letter or an apostrophe
341            if (!Character.isLetter(firstCodePoint)
342                    && '\'' != firstCodePoint) return true;
343
344            // Filter contents
345            final int length = text.length();
346            int letterCount = 0;
347            for (int i = 0; i < length; ++i) {
348                final int codePoint = text.codePointAt(i);
349                // Any word containing a '@' is probably an e-mail address
350                // Any word containing a '/' is probably either an ad-hoc combination of two
351                // words or a URI - in either case we don't want to spell check that
352                if ('@' == codePoint
353                        || '/' == codePoint) return true;
354                if (Character.isLetter(codePoint)) ++letterCount;
355            }
356            // Guestimate heuristic: perform spell checking if at least 3/4 of the characters
357            // in this word are letters
358            return (letterCount * 4 < length * 3);
359        }
360
361        // Note : this must be reentrant
362        /**
363         * Gets a list of suggestions for a specific string. This returns a list of possible
364         * corrections for the text passed as an argument. It may split or group words, and
365         * even perform grammatical analysis.
366         */
367        @Override
368        public SuggestionsInfo onGetSuggestions(final TextInfo textInfo,
369                final int suggestionsLimit) {
370            try {
371                final String text = textInfo.getText();
372
373                if (shouldFilterOut(text)) {
374                    DictAndProximity dictInfo = null;
375                    try {
376                        dictInfo = mDictionaryPool.takeOrGetNull();
377                        if (null == dictInfo) return NOT_IN_DICT_EMPTY_SUGGESTIONS;
378                        return dictInfo.mDictionary.isValidWord(text) ? IN_DICT_EMPTY_SUGGESTIONS
379                                : NOT_IN_DICT_EMPTY_SUGGESTIONS;
380                    } finally {
381                        if (null != dictInfo) {
382                            if (!mDictionaryPool.offer(dictInfo)) {
383                                Log.e(TAG, "Can't re-insert a dictionary into its pool");
384                            }
385                        }
386                    }
387                }
388
389                // TODO: Don't gather suggestions if the limit is <= 0 unless necessary
390                final SuggestionsGatherer suggestionsGatherer = new SuggestionsGatherer(text,
391                        mService.mSuggestionThreshold, mService.mLikelyThreshold, suggestionsLimit);
392                final WordComposer composer = new WordComposer();
393                final int length = text.length();
394                for (int i = 0; i < length; ++i) {
395                    final int character = text.codePointAt(i);
396                    final int proximityIndex = SpellCheckerProximityInfo.getIndexOf(character);
397                    final int[] proximities;
398                    if (-1 == proximityIndex) {
399                        proximities = new int[] { character };
400                    } else {
401                        proximities = Arrays.copyOfRange(SpellCheckerProximityInfo.PROXIMITY,
402                                proximityIndex,
403                                proximityIndex + SpellCheckerProximityInfo.ROW_SIZE);
404                    }
405                    composer.add(character, proximities,
406                            WordComposer.NOT_A_COORDINATE, WordComposer.NOT_A_COORDINATE);
407                }
408
409                final int capitalizeType = getCapitalizationType(text);
410                boolean isInDict = true;
411                DictAndProximity dictInfo = null;
412                try {
413                    dictInfo = mDictionaryPool.takeOrGetNull();
414                    if (null == dictInfo) return NOT_IN_DICT_EMPTY_SUGGESTIONS;
415                    dictInfo.mDictionary.getWords(composer, suggestionsGatherer,
416                            dictInfo.mProximityInfo);
417                    isInDict = dictInfo.mDictionary.isValidWord(text);
418                    if (!isInDict && CAPITALIZE_NONE != capitalizeType) {
419                        // We want to test the word again if it's all caps or first caps only.
420                        // If it's fully down, we already tested it, if it's mixed case, we don't
421                        // want to test a lowercase version of it.
422                        isInDict = dictInfo.mDictionary.isValidWord(text.toLowerCase(mLocale));
423                    }
424                } finally {
425                    if (null != dictInfo) {
426                        if (!mDictionaryPool.offer(dictInfo)) {
427                            Log.e(TAG, "Can't re-insert a dictionary into its pool");
428                        }
429                    }
430                }
431
432                final SuggestionsGatherer.Result result = suggestionsGatherer.getResults(
433                        capitalizeType, mLocale);
434
435                if (DBG) {
436                    Log.i(TAG, "Spell checking results for " + text + " with suggestion limit "
437                            + suggestionsLimit);
438                    Log.i(TAG, "IsInDict = " + isInDict);
439                    Log.i(TAG, "LooksLikeTypo = " + (!isInDict));
440                    Log.i(TAG, "HasLikelySuggestions = " + result.mHasLikelySuggestions);
441                    if (null != result.mSuggestions) {
442                        for (String suggestion : result.mSuggestions) {
443                            Log.i(TAG, suggestion);
444                        }
445                    }
446                }
447
448                // TODO: actually use result.mHasLikelySuggestions
449                final int flags =
450                        (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY
451                                : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO);
452                return new SuggestionsInfo(flags, result.mSuggestions);
453            } catch (RuntimeException e) {
454                // Don't kill the keyboard if there is a bug in the spell checker
455                if (DBG) {
456                    throw e;
457                } else {
458                    Log.e(TAG, "Exception while spellcheking: " + e);
459                    return NOT_IN_DICT_EMPTY_SUGGESTIONS;
460                }
461            }
462        }
463    }
464}
465