AndroidSpellCheckerService.java revision 88fa53b840686bb428b932eed7dd38162ae902c2
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16
17package com.android.inputmethod.latin.spellcheck;
18
19import android.content.Intent;
20import android.content.res.Resources;
21import android.service.textservice.SpellCheckerService;
22import android.service.textservice.SpellCheckerService.Session;
23import android.util.Log;
24import android.view.textservice.SuggestionsInfo;
25import android.view.textservice.TextInfo;
26import android.text.TextUtils;
27
28import com.android.inputmethod.compat.ArraysCompatUtils;
29import com.android.inputmethod.keyboard.Key;
30import com.android.inputmethod.keyboard.ProximityInfo;
31import com.android.inputmethod.latin.Dictionary;
32import com.android.inputmethod.latin.Dictionary.DataType;
33import com.android.inputmethod.latin.Dictionary.WordCallback;
34import com.android.inputmethod.latin.DictionaryCollection;
35import com.android.inputmethod.latin.DictionaryFactory;
36import com.android.inputmethod.latin.LocaleUtils;
37import com.android.inputmethod.latin.R;
38import com.android.inputmethod.latin.SynchronouslyLoadedUserDictionary;
39import com.android.inputmethod.latin.UserDictionary;
40import com.android.inputmethod.latin.Utils;
41import com.android.inputmethod.latin.WordComposer;
42
43import java.util.ArrayList;
44import java.util.Arrays;
45import java.util.Collections;
46import java.util.Locale;
47import java.util.Map;
48import java.util.TreeMap;
49
50/**
51 * Service for spell checking, using LatinIME's dictionaries and mechanisms.
52 */
53public class AndroidSpellCheckerService extends SpellCheckerService {
54    private static final String TAG = AndroidSpellCheckerService.class.getSimpleName();
55    private static final boolean DBG = false;
56    private static final int POOL_SIZE = 2;
57
58    private static final int CAPITALIZE_NONE = 0; // No caps, or mixed case
59    private static final int CAPITALIZE_FIRST = 1; // First only
60    private static final int CAPITALIZE_ALL = 2; // All caps
61
62    private final static String[] EMPTY_STRING_ARRAY = new String[0];
63    private final static SuggestionsInfo EMPTY_SUGGESTIONS_INFO =
64            new SuggestionsInfo(0, EMPTY_STRING_ARRAY);
65    private Map<String, DictionaryPool> mDictionaryPools =
66            Collections.synchronizedMap(new TreeMap<String, DictionaryPool>());
67    private Map<String, Dictionary> mUserDictionaries =
68            Collections.synchronizedMap(new TreeMap<String, Dictionary>());
69
70    private double mTypoThreshold;
71
72    @Override public void onCreate() {
73        super.onCreate();
74        mTypoThreshold = Double.parseDouble(getString(R.string.spellchecker_typo_threshold_value));
75    }
76
77    @Override
78    public Session createSession() {
79        return new AndroidSpellCheckerSession(this);
80    }
81
82    private static class SuggestionsGatherer implements WordCallback {
83        public static class Result {
84            public final String[] mSuggestions;
85            public final boolean mLooksLikeTypo;
86            public Result(final String[] gatheredSuggestions, final boolean looksLikeTypo) {
87                mSuggestions = gatheredSuggestions;
88                mLooksLikeTypo = looksLikeTypo;
89            }
90        }
91
92        private final int DEFAULT_SUGGESTION_LENGTH = 16;
93        private final ArrayList<CharSequence> mSuggestions;
94        private final int[] mScores;
95        private final int mMaxLength;
96        private int mLength = 0;
97
98        // The two following attributes are only ever filled if the requested max length
99        // is 0 (or less, which is treated the same).
100        private String mBestSuggestion = null;
101        private int mBestScore = Integer.MIN_VALUE; // As small as possible
102
103        SuggestionsGatherer(final int maxLength) {
104            mMaxLength = maxLength;
105            mSuggestions = new ArrayList<CharSequence>(maxLength + 1);
106            mScores = new int[mMaxLength];
107        }
108
109        @Override
110        synchronized public boolean addWord(char[] word, int wordOffset, int wordLength, int score,
111                int dicTypeId, DataType dataType) {
112            final int positionIndex = ArraysCompatUtils.binarySearch(mScores, 0, mLength, score);
113            // binarySearch returns the index if the element exists, and -<insertion index> - 1
114            // if it doesn't. See documentation for binarySearch.
115            final int insertIndex = positionIndex >= 0 ? positionIndex : -positionIndex - 1;
116
117            if (mLength < mMaxLength) {
118                final int copyLen = mLength - insertIndex;
119                ++mLength;
120                System.arraycopy(mScores, insertIndex, mScores, insertIndex + 1, copyLen);
121                mSuggestions.add(insertIndex, new String(word, wordOffset, wordLength));
122            } else {
123                if (insertIndex == 0) {
124                    // If the maxLength is 0 (should never be less, but if it is, it's treated as 0)
125                    // then we need to keep track of the best suggestion in mBestScore and
126                    // mBestSuggestion. This is so that we know whether the best suggestion makes
127                    // the score cutoff, since we need to know that to return a meaningful
128                    // looksLikeTypo.
129                    if (0 >= mMaxLength) {
130                        if (score > mBestScore) {
131                            mBestScore = score;
132                            mBestSuggestion = new String(word, wordOffset, wordLength);
133                        }
134                    }
135                    return true;
136                }
137                System.arraycopy(mScores, 1, mScores, 0, insertIndex);
138                mSuggestions.add(insertIndex, new String(word, wordOffset, wordLength));
139                mSuggestions.remove(0);
140            }
141            mScores[insertIndex] = score;
142
143            return true;
144        }
145
146        public Result getResults(final CharSequence originalText, final double threshold,
147                final int capitalizeType, final Locale locale) {
148            final String[] gatheredSuggestions;
149            final boolean looksLikeTypo;
150            if (0 == mLength) {
151                // Either we found no suggestions, or we found some BUT the max length was 0.
152                // If we found some mBestSuggestion will not be null. If it is null, then
153                // we found none, regardless of the max length.
154                if (null == mBestSuggestion) {
155                    gatheredSuggestions = null;
156                    looksLikeTypo = false;
157                } else {
158                    gatheredSuggestions = EMPTY_STRING_ARRAY;
159                    final double normalizedScore =
160                            Utils.calcNormalizedScore(originalText, mBestSuggestion, mBestScore);
161                    looksLikeTypo = (normalizedScore > threshold);
162                }
163            } else {
164                if (DBG) {
165                    if (mLength != mSuggestions.size()) {
166                        Log.e(TAG, "Suggestion size is not the same as stored mLength");
167                    }
168                    for (int i = mLength - 1; i >= 0; --i) {
169                        Log.i(TAG, "" + mScores[i] + " " + mSuggestions.get(i));
170                    }
171                }
172                Collections.reverse(mSuggestions);
173                Utils.removeDupes(mSuggestions);
174                if (CAPITALIZE_ALL == capitalizeType) {
175                    for (int i = 0; i < mSuggestions.size(); ++i) {
176                        // get(i) returns a CharSequence which is actually a String so .toString()
177                        // should return the same object.
178                        mSuggestions.set(i, mSuggestions.get(i).toString().toUpperCase(locale));
179                    }
180                } else if (CAPITALIZE_FIRST == capitalizeType) {
181                    for (int i = 0; i < mSuggestions.size(); ++i) {
182                        // Likewise
183                        mSuggestions.set(i, Utils.toTitleCase(mSuggestions.get(i).toString(),
184                                locale));
185                    }
186                }
187                // This returns a String[], while toArray() returns an Object[] which cannot be cast
188                // into a String[].
189                gatheredSuggestions = mSuggestions.toArray(EMPTY_STRING_ARRAY);
190
191                final int bestScore = mScores[mLength - 1];
192                final CharSequence bestSuggestion = mSuggestions.get(0);
193                final double normalizedScore =
194                        Utils.calcNormalizedScore(originalText, bestSuggestion, bestScore);
195                looksLikeTypo = (normalizedScore > threshold);
196                if (DBG) {
197                    Log.i(TAG, "Best suggestion : " + bestSuggestion + ", score " + bestScore);
198                    Log.i(TAG, "Normalized score = " + normalizedScore + " (threshold " + threshold
199                            + ") => looksLikeTypo = " + looksLikeTypo);
200                }
201            }
202            return new Result(gatheredSuggestions, looksLikeTypo);
203        }
204    }
205
206    @Override
207    public boolean onUnbind(final Intent intent) {
208        final Map<String, DictionaryPool> oldPools = mDictionaryPools;
209        mDictionaryPools = Collections.synchronizedMap(new TreeMap<String, DictionaryPool>());
210        final Map<String, Dictionary> oldUserDictionaries = mUserDictionaries;
211        mUserDictionaries = Collections.synchronizedMap(new TreeMap<String, Dictionary>());
212        for (DictionaryPool pool : oldPools.values()) {
213            pool.close();
214        }
215        for (Dictionary dict : oldUserDictionaries.values()) {
216            dict.close();
217        }
218        return false;
219    }
220
221    private DictionaryPool getDictionaryPool(final String locale) {
222        DictionaryPool pool = mDictionaryPools.get(locale);
223        if (null == pool) {
224            final Locale localeObject = LocaleUtils.constructLocaleFromString(locale);
225            pool = new DictionaryPool(POOL_SIZE, this, localeObject);
226            mDictionaryPools.put(locale, pool);
227        }
228        return pool;
229    }
230
231    public DictAndProximity createDictAndProximity(final Locale locale) {
232        final ProximityInfo proximityInfo = ProximityInfo.createSpellCheckerProximityInfo();
233        final Resources resources = getResources();
234        final int fallbackResourceId = Utils.getMainDictionaryResourceId(resources);
235        final DictionaryCollection dictionaryCollection =
236                DictionaryFactory.createDictionaryFromManager(this, locale, fallbackResourceId);
237        final String localeStr = locale.toString();
238        Dictionary userDict = mUserDictionaries.get(localeStr);
239        if (null == userDict) {
240            userDict = new SynchronouslyLoadedUserDictionary(this, localeStr);
241            mUserDictionaries.put(localeStr, userDict);
242        }
243        dictionaryCollection.addDictionary(userDict);
244        return new DictAndProximity(dictionaryCollection, proximityInfo);
245    }
246
247    // This method assumes the text is not empty or null.
248    private static int getCapitalizationType(String text) {
249        // If the first char is not uppercase, then the word is either all lower case,
250        // and in either case we return CAPITALIZE_NONE.
251        if (!Character.isUpperCase(text.codePointAt(0))) return CAPITALIZE_NONE;
252        final int len = text.codePointCount(0, text.length());
253        int capsCount = 1;
254        for (int i = 1; i < len; ++i) {
255            if (1 != capsCount && i != capsCount) break;
256            if (Character.isUpperCase(text.codePointAt(i))) ++capsCount;
257        }
258        // We know the first char is upper case. So we want to test if either everything
259        // else is lower case, or if everything else is upper case. If the string is
260        // exactly one char long, then we will arrive here with capsCount 1, and this is
261        // correct, too.
262        if (1 == capsCount) return CAPITALIZE_FIRST;
263        return (len == capsCount ? CAPITALIZE_ALL : CAPITALIZE_NONE);
264    }
265
266    private static class AndroidSpellCheckerSession extends Session {
267        // Immutable, but need the locale which is not available in the constructor yet
268        private DictionaryPool mDictionaryPool;
269        // Likewise
270        private Locale mLocale;
271
272        private final AndroidSpellCheckerService mService;
273
274        AndroidSpellCheckerSession(final AndroidSpellCheckerService service) {
275            mService = service;
276        }
277
278        @Override
279        public void onCreate() {
280            final String localeString = getLocale();
281            mDictionaryPool = mService.getDictionaryPool(localeString);
282            mLocale = LocaleUtils.constructLocaleFromString(localeString);
283        }
284
285        /**
286         * Finds out whether a particular string should be filtered out of spell checking.
287         *
288         * This will loosely match URLs, numbers, symbols.
289         *
290         * @param text the string to evaluate.
291         * @return true if we should filter this text out, false otherwise
292         */
293        private boolean shouldFilterOut(final String text) {
294            if (TextUtils.isEmpty(text) || text.length() <= 1) return true;
295
296            // TODO: check if an equivalent processing can't be done more quickly with a
297            // compiled regexp.
298            // Filter by first letter
299            final int firstCodePoint = text.codePointAt(0);
300            // Filter out words that don't start with a letter or an apostrophe
301            if (!Character.isLetter(firstCodePoint)
302                    && '\'' != firstCodePoint) return true;
303
304            // Filter contents
305            final int length = text.length();
306            int letterCount = 0;
307            for (int i = 0; i < length; ++i) {
308                final int codePoint = text.codePointAt(i);
309                // Any word containing a '@' is probably an e-mail address
310                // Any word containing a '/' is probably either an ad-hoc combination of two
311                // words or a URI - in either case we don't want to spell check that
312                if ('@' == codePoint
313                        || '/' == codePoint) return true;
314                if (Character.isLetter(codePoint)) ++letterCount;
315            }
316            // Guestimate heuristic: perform spell checking if at least 3/4 of the characters
317            // in this word are letters
318            return (letterCount * 4 < length * 3);
319        }
320
321        // Note : this must be reentrant
322        /**
323         * Gets a list of suggestions for a specific string. This returns a list of possible
324         * corrections for the text passed as an argument. It may split or group words, and
325         * even perform grammatical analysis.
326         */
327        @Override
328        public SuggestionsInfo onGetSuggestions(final TextInfo textInfo,
329                final int suggestionsLimit) {
330            final String text = textInfo.getText();
331
332            if (shouldFilterOut(text)) return EMPTY_SUGGESTIONS_INFO;
333
334            final SuggestionsGatherer suggestionsGatherer =
335                    new SuggestionsGatherer(suggestionsLimit);
336            final WordComposer composer = new WordComposer();
337            final int length = text.length();
338            for (int i = 0; i < length; ++i) {
339                final int character = text.codePointAt(i);
340                final int proximityIndex = SpellCheckerProximityInfo.getIndexOf(character);
341                final int[] proximities;
342                if (-1 == proximityIndex) {
343                    proximities = new int[] { character };
344                } else {
345                    proximities = Arrays.copyOfRange(SpellCheckerProximityInfo.PROXIMITY,
346                            proximityIndex, proximityIndex + SpellCheckerProximityInfo.ROW_SIZE);
347                }
348                composer.add(character, proximities,
349                        WordComposer.NOT_A_COORDINATE, WordComposer.NOT_A_COORDINATE);
350            }
351
352            final int capitalizeType = getCapitalizationType(text);
353            boolean isInDict = true;
354            try {
355                final DictAndProximity dictInfo = mDictionaryPool.take();
356                dictInfo.mDictionary.getWords(composer, suggestionsGatherer,
357                        dictInfo.mProximityInfo);
358                isInDict = dictInfo.mDictionary.isValidWord(text);
359                if (!isInDict && CAPITALIZE_NONE != capitalizeType) {
360                    // We want to test the word again if it's all caps or first caps only.
361                    // If it's fully down, we already tested it, if it's mixed case, we don't
362                    // want to test a lowercase version of it.
363                    isInDict = dictInfo.mDictionary.isValidWord(text.toLowerCase(mLocale));
364                }
365                if (!mDictionaryPool.offer(dictInfo)) {
366                    Log.e(TAG, "Can't re-insert a dictionary into its pool");
367                }
368            } catch (InterruptedException e) {
369                // I don't think this can happen.
370                return EMPTY_SUGGESTIONS_INFO;
371            }
372
373            final SuggestionsGatherer.Result result = suggestionsGatherer.getResults(text,
374                    mService.mTypoThreshold, capitalizeType, mLocale);
375
376            if (DBG) {
377                Log.i(TAG, "Spell checking results for " + text + " with suggestion limit "
378                        + suggestionsLimit);
379                Log.i(TAG, "IsInDict = " + result.mLooksLikeTypo);
380                Log.i(TAG, "LooksLikeTypo = " + result.mLooksLikeTypo);
381                for (String suggestion : result.mSuggestions) {
382                    Log.i(TAG, suggestion);
383                }
384            }
385
386            final int flags =
387                    (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY : 0)
388                            | (result.mLooksLikeTypo
389                                    ? SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO : 0);
390            return new SuggestionsInfo(flags, result.mSuggestions);
391        }
392    }
393}
394