AndroidSpellCheckerService.java revision e897e4d3422c8d9d8b6f051376cc2ba16e4d5945
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16
17package com.android.inputmethod.latin.spellcheck;
18
19import android.content.Intent;
20import android.content.res.Resources;
21import android.service.textservice.SpellCheckerService;
22import android.service.textservice.SpellCheckerService.Session;
23import android.util.Log;
24import android.view.textservice.SuggestionsInfo;
25import android.view.textservice.TextInfo;
26import android.text.TextUtils;
27
28import com.android.inputmethod.compat.ArraysCompatUtils;
29import com.android.inputmethod.keyboard.Key;
30import com.android.inputmethod.keyboard.ProximityInfo;
31import com.android.inputmethod.latin.Dictionary;
32import com.android.inputmethod.latin.Dictionary.DataType;
33import com.android.inputmethod.latin.Dictionary.WordCallback;
34import com.android.inputmethod.latin.DictionaryCollection;
35import com.android.inputmethod.latin.DictionaryFactory;
36import com.android.inputmethod.latin.LocaleUtils;
37import com.android.inputmethod.latin.R;
38import com.android.inputmethod.latin.SynchronouslyLoadedUserDictionary;
39import com.android.inputmethod.latin.UserDictionary;
40import com.android.inputmethod.latin.Utils;
41import com.android.inputmethod.latin.WordComposer;
42
43import java.util.ArrayList;
44import java.util.Arrays;
45import java.util.Collections;
46import java.util.Locale;
47import java.util.Map;
48import java.util.TreeMap;
49
50/**
51 * Service for spell checking, using LatinIME's dictionaries and mechanisms.
52 */
53public class AndroidSpellCheckerService extends SpellCheckerService {
54    private static final String TAG = AndroidSpellCheckerService.class.getSimpleName();
55    private static final boolean DBG = false;
56    private static final int POOL_SIZE = 2;
57
58    private static final int CAPITALIZE_NONE = 0; // No caps, or mixed case
59    private static final int CAPITALIZE_FIRST = 1; // First only
60    private static final int CAPITALIZE_ALL = 2; // All caps
61
62    private final static String[] EMPTY_STRING_ARRAY = new String[0];
63    private final static SuggestionsInfo NOT_IN_DICT_EMPTY_SUGGESTIONS =
64            new SuggestionsInfo(0, EMPTY_STRING_ARRAY);
65    private final static SuggestionsInfo IN_DICT_EMPTY_SUGGESTIONS =
66            new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY,
67                    EMPTY_STRING_ARRAY);
68    private Map<String, DictionaryPool> mDictionaryPools =
69            Collections.synchronizedMap(new TreeMap<String, DictionaryPool>());
70    private Map<String, Dictionary> mUserDictionaries =
71            Collections.synchronizedMap(new TreeMap<String, Dictionary>());
72
73    private double mTypoThreshold;
74
75    @Override public void onCreate() {
76        super.onCreate();
77        mTypoThreshold = Double.parseDouble(getString(R.string.spellchecker_typo_threshold_value));
78    }
79
80    @Override
81    public Session createSession() {
82        return new AndroidSpellCheckerSession(this);
83    }
84
85    private static class SuggestionsGatherer implements WordCallback {
86        public static class Result {
87            public final String[] mSuggestions;
88            public final boolean mLooksLikeTypo;
89            public Result(final String[] gatheredSuggestions, final boolean looksLikeTypo) {
90                mSuggestions = gatheredSuggestions;
91                mLooksLikeTypo = looksLikeTypo;
92            }
93        }
94
95        private final int DEFAULT_SUGGESTION_LENGTH = 16;
96        private final ArrayList<CharSequence> mSuggestions;
97        private final int[] mScores;
98        private final int mMaxLength;
99        private int mLength = 0;
100
101        // The two following attributes are only ever filled if the requested max length
102        // is 0 (or less, which is treated the same).
103        private String mBestSuggestion = null;
104        private int mBestScore = Integer.MIN_VALUE; // As small as possible
105
106        SuggestionsGatherer(final int maxLength) {
107            mMaxLength = maxLength;
108            mSuggestions = new ArrayList<CharSequence>(maxLength + 1);
109            mScores = new int[mMaxLength];
110        }
111
112        @Override
113        synchronized public boolean addWord(char[] word, int wordOffset, int wordLength, int score,
114                int dicTypeId, DataType dataType) {
115            final int positionIndex = ArraysCompatUtils.binarySearch(mScores, 0, mLength, score);
116            // binarySearch returns the index if the element exists, and -<insertion index> - 1
117            // if it doesn't. See documentation for binarySearch.
118            final int insertIndex = positionIndex >= 0 ? positionIndex : -positionIndex - 1;
119
120            if (mLength < mMaxLength) {
121                final int copyLen = mLength - insertIndex;
122                ++mLength;
123                System.arraycopy(mScores, insertIndex, mScores, insertIndex + 1, copyLen);
124                mSuggestions.add(insertIndex, new String(word, wordOffset, wordLength));
125            } else {
126                if (insertIndex == 0) {
127                    // If the maxLength is 0 (should never be less, but if it is, it's treated as 0)
128                    // then we need to keep track of the best suggestion in mBestScore and
129                    // mBestSuggestion. This is so that we know whether the best suggestion makes
130                    // the score cutoff, since we need to know that to return a meaningful
131                    // looksLikeTypo.
132                    if (0 >= mMaxLength) {
133                        if (score > mBestScore) {
134                            mBestScore = score;
135                            mBestSuggestion = new String(word, wordOffset, wordLength);
136                        }
137                    }
138                    return true;
139                }
140                System.arraycopy(mScores, 1, mScores, 0, insertIndex);
141                mSuggestions.add(insertIndex, new String(word, wordOffset, wordLength));
142                mSuggestions.remove(0);
143            }
144            mScores[insertIndex] = score;
145
146            return true;
147        }
148
149        public Result getResults(final CharSequence originalText, final double threshold,
150                final int capitalizeType, final Locale locale) {
151            final String[] gatheredSuggestions;
152            final boolean looksLikeTypo;
153            if (0 == mLength) {
154                // Either we found no suggestions, or we found some BUT the max length was 0.
155                // If we found some mBestSuggestion will not be null. If it is null, then
156                // we found none, regardless of the max length.
157                if (null == mBestSuggestion) {
158                    gatheredSuggestions = null;
159                    looksLikeTypo = false;
160                } else {
161                    gatheredSuggestions = EMPTY_STRING_ARRAY;
162                    final double normalizedScore =
163                            Utils.calcNormalizedScore(originalText, mBestSuggestion, mBestScore);
164                    looksLikeTypo = (normalizedScore > threshold);
165                }
166            } else {
167                if (DBG) {
168                    if (mLength != mSuggestions.size()) {
169                        Log.e(TAG, "Suggestion size is not the same as stored mLength");
170                    }
171                    for (int i = mLength - 1; i >= 0; --i) {
172                        Log.i(TAG, "" + mScores[i] + " " + mSuggestions.get(i));
173                    }
174                }
175                Collections.reverse(mSuggestions);
176                Utils.removeDupes(mSuggestions);
177                if (CAPITALIZE_ALL == capitalizeType) {
178                    for (int i = 0; i < mSuggestions.size(); ++i) {
179                        // get(i) returns a CharSequence which is actually a String so .toString()
180                        // should return the same object.
181                        mSuggestions.set(i, mSuggestions.get(i).toString().toUpperCase(locale));
182                    }
183                } else if (CAPITALIZE_FIRST == capitalizeType) {
184                    for (int i = 0; i < mSuggestions.size(); ++i) {
185                        // Likewise
186                        mSuggestions.set(i, Utils.toTitleCase(mSuggestions.get(i).toString(),
187                                locale));
188                    }
189                }
190                // This returns a String[], while toArray() returns an Object[] which cannot be cast
191                // into a String[].
192                gatheredSuggestions = mSuggestions.toArray(EMPTY_STRING_ARRAY);
193
194                final int bestScore = mScores[mLength - 1];
195                final CharSequence bestSuggestion = mSuggestions.get(0);
196                final double normalizedScore =
197                        Utils.calcNormalizedScore(originalText, bestSuggestion, bestScore);
198                looksLikeTypo = (normalizedScore > threshold);
199                if (DBG) {
200                    Log.i(TAG, "Best suggestion : " + bestSuggestion + ", score " + bestScore);
201                    Log.i(TAG, "Normalized score = " + normalizedScore + " (threshold " + threshold
202                            + ") => looksLikeTypo = " + looksLikeTypo);
203                }
204            }
205            return new Result(gatheredSuggestions, looksLikeTypo);
206        }
207    }
208
209    @Override
210    public boolean onUnbind(final Intent intent) {
211        final Map<String, DictionaryPool> oldPools = mDictionaryPools;
212        mDictionaryPools = Collections.synchronizedMap(new TreeMap<String, DictionaryPool>());
213        final Map<String, Dictionary> oldUserDictionaries = mUserDictionaries;
214        mUserDictionaries = Collections.synchronizedMap(new TreeMap<String, Dictionary>());
215        for (DictionaryPool pool : oldPools.values()) {
216            pool.close();
217        }
218        for (Dictionary dict : oldUserDictionaries.values()) {
219            dict.close();
220        }
221        return false;
222    }
223
224    private DictionaryPool getDictionaryPool(final String locale) {
225        DictionaryPool pool = mDictionaryPools.get(locale);
226        if (null == pool) {
227            final Locale localeObject = LocaleUtils.constructLocaleFromString(locale);
228            pool = new DictionaryPool(POOL_SIZE, this, localeObject);
229            mDictionaryPools.put(locale, pool);
230        }
231        return pool;
232    }
233
234    public DictAndProximity createDictAndProximity(final Locale locale) {
235        final ProximityInfo proximityInfo = ProximityInfo.createSpellCheckerProximityInfo();
236        final Resources resources = getResources();
237        final int fallbackResourceId = Utils.getMainDictionaryResourceId(resources);
238        final DictionaryCollection dictionaryCollection =
239                DictionaryFactory.createDictionaryFromManager(this, locale, fallbackResourceId);
240        final String localeStr = locale.toString();
241        Dictionary userDict = mUserDictionaries.get(localeStr);
242        if (null == userDict) {
243            userDict = new SynchronouslyLoadedUserDictionary(this, localeStr, true);
244            mUserDictionaries.put(localeStr, userDict);
245        }
246        dictionaryCollection.addDictionary(userDict);
247        return new DictAndProximity(dictionaryCollection, proximityInfo);
248    }
249
250    // This method assumes the text is not empty or null.
251    private static int getCapitalizationType(String text) {
252        // If the first char is not uppercase, then the word is either all lower case,
253        // and in either case we return CAPITALIZE_NONE.
254        if (!Character.isUpperCase(text.codePointAt(0))) return CAPITALIZE_NONE;
255        final int len = text.codePointCount(0, text.length());
256        int capsCount = 1;
257        for (int i = 1; i < len; ++i) {
258            if (1 != capsCount && i != capsCount) break;
259            if (Character.isUpperCase(text.codePointAt(i))) ++capsCount;
260        }
261        // We know the first char is upper case. So we want to test if either everything
262        // else is lower case, or if everything else is upper case. If the string is
263        // exactly one char long, then we will arrive here with capsCount 1, and this is
264        // correct, too.
265        if (1 == capsCount) return CAPITALIZE_FIRST;
266        return (len == capsCount ? CAPITALIZE_ALL : CAPITALIZE_NONE);
267    }
268
269    private static class AndroidSpellCheckerSession extends Session {
270        // Immutable, but need the locale which is not available in the constructor yet
271        private DictionaryPool mDictionaryPool;
272        // Likewise
273        private Locale mLocale;
274
275        private final AndroidSpellCheckerService mService;
276
277        AndroidSpellCheckerSession(final AndroidSpellCheckerService service) {
278            mService = service;
279        }
280
281        @Override
282        public void onCreate() {
283            final String localeString = getLocale();
284            mDictionaryPool = mService.getDictionaryPool(localeString);
285            mLocale = LocaleUtils.constructLocaleFromString(localeString);
286        }
287
288        /**
289         * Finds out whether a particular string should be filtered out of spell checking.
290         *
291         * This will loosely match URLs, numbers, symbols.
292         *
293         * @param text the string to evaluate.
294         * @return true if we should filter this text out, false otherwise
295         */
296        private boolean shouldFilterOut(final String text) {
297            if (TextUtils.isEmpty(text) || text.length() <= 1) return true;
298
299            // TODO: check if an equivalent processing can't be done more quickly with a
300            // compiled regexp.
301            // Filter by first letter
302            final int firstCodePoint = text.codePointAt(0);
303            // Filter out words that don't start with a letter or an apostrophe
304            if (!Character.isLetter(firstCodePoint)
305                    && '\'' != firstCodePoint) return true;
306
307            // Filter contents
308            final int length = text.length();
309            int letterCount = 0;
310            for (int i = 0; i < length; ++i) {
311                final int codePoint = text.codePointAt(i);
312                // Any word containing a '@' is probably an e-mail address
313                // Any word containing a '/' is probably either an ad-hoc combination of two
314                // words or a URI - in either case we don't want to spell check that
315                if ('@' == codePoint
316                        || '/' == codePoint) return true;
317                if (Character.isLetter(codePoint)) ++letterCount;
318            }
319            // Guestimate heuristic: perform spell checking if at least 3/4 of the characters
320            // in this word are letters
321            return (letterCount * 4 < length * 3);
322        }
323
324        // Note : this must be reentrant
325        /**
326         * Gets a list of suggestions for a specific string. This returns a list of possible
327         * corrections for the text passed as an argument. It may split or group words, and
328         * even perform grammatical analysis.
329         */
330        @Override
331        public SuggestionsInfo onGetSuggestions(final TextInfo textInfo,
332                final int suggestionsLimit) {
333            try {
334                final String text = textInfo.getText();
335
336                if (shouldFilterOut(text)) {
337                    final DictAndProximity dictInfo = mDictionaryPool.takeOrGetNull();
338                    if (null == dictInfo) return NOT_IN_DICT_EMPTY_SUGGESTIONS;
339                    return dictInfo.mDictionary.isValidWord(text) ? IN_DICT_EMPTY_SUGGESTIONS
340                            : NOT_IN_DICT_EMPTY_SUGGESTIONS;
341                }
342
343                final SuggestionsGatherer suggestionsGatherer =
344                        new SuggestionsGatherer(suggestionsLimit);
345                final WordComposer composer = new WordComposer();
346                final int length = text.length();
347                for (int i = 0; i < length; ++i) {
348                    final int character = text.codePointAt(i);
349                    final int proximityIndex = SpellCheckerProximityInfo.getIndexOf(character);
350                    final int[] proximities;
351                    if (-1 == proximityIndex) {
352                        proximities = new int[] { character };
353                    } else {
354                        proximities = Arrays.copyOfRange(SpellCheckerProximityInfo.PROXIMITY,
355                                proximityIndex,
356                                proximityIndex + SpellCheckerProximityInfo.ROW_SIZE);
357                    }
358                    composer.add(character, proximities,
359                            WordComposer.NOT_A_COORDINATE, WordComposer.NOT_A_COORDINATE);
360                }
361
362                final int capitalizeType = getCapitalizationType(text);
363                boolean isInDict = true;
364                final DictAndProximity dictInfo = mDictionaryPool.takeOrGetNull();
365                if (null == dictInfo) return NOT_IN_DICT_EMPTY_SUGGESTIONS;
366                dictInfo.mDictionary.getWords(composer, suggestionsGatherer,
367                        dictInfo.mProximityInfo);
368                isInDict = dictInfo.mDictionary.isValidWord(text);
369                if (!isInDict && CAPITALIZE_NONE != capitalizeType) {
370                    // We want to test the word again if it's all caps or first caps only.
371                    // If it's fully down, we already tested it, if it's mixed case, we don't
372                    // want to test a lowercase version of it.
373                    isInDict = dictInfo.mDictionary.isValidWord(text.toLowerCase(mLocale));
374                }
375                if (!mDictionaryPool.offer(dictInfo)) {
376                    Log.e(TAG, "Can't re-insert a dictionary into its pool");
377                }
378
379                final SuggestionsGatherer.Result result = suggestionsGatherer.getResults(text,
380                        mService.mTypoThreshold, capitalizeType, mLocale);
381
382                if (DBG) {
383                    Log.i(TAG, "Spell checking results for " + text + " with suggestion limit "
384                            + suggestionsLimit);
385                    Log.i(TAG, "IsInDict = " + result.mLooksLikeTypo);
386                    Log.i(TAG, "LooksLikeTypo = " + result.mLooksLikeTypo);
387                    for (String suggestion : result.mSuggestions) {
388                        Log.i(TAG, suggestion);
389                    }
390                }
391
392                final int flags =
393                        (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY : 0)
394                                | (result.mLooksLikeTypo
395                                        ? SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO : 0);
396                return new SuggestionsInfo(flags, result.mSuggestions);
397            } catch (RuntimeException e) {
398                // Don't kill the keyboard if there is a bug in the spell checker
399                if (DBG) {
400                    throw e;
401                } else {
402                    Log.e(TAG, "Exception while spellcheking: " + e);
403                    return NOT_IN_DICT_EMPTY_SUGGESTIONS;
404                }
405            }
406        }
407    }
408}
409