AndroidSpellCheckerService.java revision b1f3c24c6326ad63b4fcad4014c20161984e40ef
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16
17package com.android.inputmethod.latin.spellcheck;
18
19import android.content.Intent;
20import android.content.SharedPreferences;
21import android.preference.PreferenceManager;
22import android.service.textservice.SpellCheckerService;
23import android.text.TextUtils;
24import android.util.Log;
25import android.util.LruCache;
26import android.view.textservice.SentenceSuggestionsInfo;
27import android.view.textservice.SuggestionsInfo;
28import android.view.textservice.TextInfo;
29
30import com.android.inputmethod.compat.SuggestionsInfoCompatUtils;
31import com.android.inputmethod.keyboard.ProximityInfo;
32import com.android.inputmethod.latin.BinaryDictionary;
33import com.android.inputmethod.latin.Dictionary;
34import com.android.inputmethod.latin.Dictionary.WordCallback;
35import com.android.inputmethod.latin.DictionaryCollection;
36import com.android.inputmethod.latin.DictionaryFactory;
37import com.android.inputmethod.latin.LatinIME;
38import com.android.inputmethod.latin.LocaleUtils;
39import com.android.inputmethod.latin.R;
40import com.android.inputmethod.latin.StringUtils;
41import com.android.inputmethod.latin.SynchronouslyLoadedContactsBinaryDictionary;
42import com.android.inputmethod.latin.SynchronouslyLoadedContactsDictionary;
43import com.android.inputmethod.latin.SynchronouslyLoadedUserBinaryDictionary;
44import com.android.inputmethod.latin.SynchronouslyLoadedUserDictionary;
45import com.android.inputmethod.latin.WhitelistDictionary;
46import com.android.inputmethod.latin.WordComposer;
47
48import java.lang.ref.WeakReference;
49import java.util.ArrayList;
50import java.util.Arrays;
51import java.util.Collections;
52import java.util.HashSet;
53import java.util.Iterator;
54import java.util.Locale;
55import java.util.Map;
56import java.util.TreeMap;
57
58/**
59 * Service for spell checking, using LatinIME's dictionaries and mechanisms.
60 */
61public class AndroidSpellCheckerService extends SpellCheckerService
62        implements SharedPreferences.OnSharedPreferenceChangeListener {
63    private static final String TAG = AndroidSpellCheckerService.class.getSimpleName();
64    private static final boolean DBG = false;
65    private static final int POOL_SIZE = 2;
66
67    public static final String PREF_USE_CONTACTS_KEY = "pref_spellcheck_use_contacts";
68
69    private static final int CAPITALIZE_NONE = 0; // No caps, or mixed case
70    private static final int CAPITALIZE_FIRST = 1; // First only
71    private static final int CAPITALIZE_ALL = 2; // All caps
72
73    private final static String[] EMPTY_STRING_ARRAY = new String[0];
74    private Map<String, DictionaryPool> mDictionaryPools =
75            Collections.synchronizedMap(new TreeMap<String, DictionaryPool>());
76    private Map<String, Dictionary> mUserDictionaries =
77            Collections.synchronizedMap(new TreeMap<String, Dictionary>());
78    private Map<String, Dictionary> mWhitelistDictionaries =
79            Collections.synchronizedMap(new TreeMap<String, Dictionary>());
80    private Dictionary mContactsDictionary;
81
82    // The threshold for a candidate to be offered as a suggestion.
83    private float mSuggestionThreshold;
84    // The threshold for a suggestion to be considered "recommended".
85    private float mRecommendedThreshold;
86    // Whether to use the contacts dictionary
87    private boolean mUseContactsDictionary;
88    private final Object mUseContactsLock = new Object();
89
90    private final HashSet<WeakReference<DictionaryCollection>> mDictionaryCollectionsList =
91            new HashSet<WeakReference<DictionaryCollection>>();
92
93    public static final int SCRIPT_LATIN = 0;
94    public static final int SCRIPT_CYRILLIC = 1;
95    private static final String SINGLE_QUOTE = "\u0027";
96    private static final String APOSTROPHE = "\u2019";
97    private static final TreeMap<String, Integer> mLanguageToScript;
98    static {
99        // List of the supported languages and their associated script. We won't check
100        // words written in another script than the selected script, because we know we
101        // don't have those in our dictionary so we will underline everything and we
102        // will never have any suggestions, so it makes no sense checking them, and this
103        // is done in {@link #shouldFilterOut}. Also, the script is used to choose which
104        // proximity to pass to the dictionary descent algorithm.
105        // IMPORTANT: this only contains languages - do not write countries in there.
106        // Only the language is searched from the map.
107        mLanguageToScript = new TreeMap<String, Integer>();
108        mLanguageToScript.put("en", SCRIPT_LATIN);
109        mLanguageToScript.put("fr", SCRIPT_LATIN);
110        mLanguageToScript.put("de", SCRIPT_LATIN);
111        mLanguageToScript.put("nl", SCRIPT_LATIN);
112        mLanguageToScript.put("cs", SCRIPT_LATIN);
113        mLanguageToScript.put("es", SCRIPT_LATIN);
114        mLanguageToScript.put("it", SCRIPT_LATIN);
115        mLanguageToScript.put("hr", SCRIPT_LATIN);
116        mLanguageToScript.put("pt", SCRIPT_LATIN);
117        mLanguageToScript.put("ru", SCRIPT_CYRILLIC);
118        // TODO: Make a persian proximity, and activate the Farsi subtype.
119        // mLanguageToScript.put("fa", SCRIPT_PERSIAN);
120    }
121
122    @Override public void onCreate() {
123        super.onCreate();
124        mSuggestionThreshold =
125                Float.parseFloat(getString(R.string.spellchecker_suggestion_threshold_value));
126        mRecommendedThreshold =
127                Float.parseFloat(getString(R.string.spellchecker_recommended_threshold_value));
128        final SharedPreferences prefs = PreferenceManager.getDefaultSharedPreferences(this);
129        prefs.registerOnSharedPreferenceChangeListener(this);
130        onSharedPreferenceChanged(prefs, PREF_USE_CONTACTS_KEY);
131    }
132
133    private static int getScriptFromLocale(final Locale locale) {
134        final Integer script = mLanguageToScript.get(locale.getLanguage());
135        if (null == script) {
136            throw new RuntimeException("We have been called with an unsupported language: \""
137                    + locale.getLanguage() + "\". Framework bug?");
138        }
139        return script;
140    }
141
142    @Override
143    public void onSharedPreferenceChanged(final SharedPreferences prefs, final String key) {
144        if (!PREF_USE_CONTACTS_KEY.equals(key)) return;
145        synchronized(mUseContactsLock) {
146            mUseContactsDictionary = prefs.getBoolean(PREF_USE_CONTACTS_KEY, true);
147            if (mUseContactsDictionary) {
148                startUsingContactsDictionaryLocked();
149            } else {
150                stopUsingContactsDictionaryLocked();
151            }
152        }
153    }
154
155    private void startUsingContactsDictionaryLocked() {
156        if (null == mContactsDictionary) {
157            mContactsDictionary = new SynchronouslyLoadedContactsDictionary(this);
158        }
159        final Iterator<WeakReference<DictionaryCollection>> iterator =
160                mDictionaryCollectionsList.iterator();
161        while (iterator.hasNext()) {
162            final WeakReference<DictionaryCollection> dictRef = iterator.next();
163            final DictionaryCollection dict = dictRef.get();
164            if (null == dict) {
165                iterator.remove();
166            } else {
167                dict.addDictionary(mContactsDictionary);
168            }
169        }
170    }
171
172    private void stopUsingContactsDictionaryLocked() {
173        if (null == mContactsDictionary) return;
174        final Dictionary contactsDict = mContactsDictionary;
175        // TODO: revert to the concrete type when USE_BINARY_CONTACTS_DICTIONARY is no longer needed
176        mContactsDictionary = null;
177        final Iterator<WeakReference<DictionaryCollection>> iterator =
178                mDictionaryCollectionsList.iterator();
179        while (iterator.hasNext()) {
180            final WeakReference<DictionaryCollection> dictRef = iterator.next();
181            final DictionaryCollection dict = dictRef.get();
182            if (null == dict) {
183                iterator.remove();
184            } else {
185                dict.removeDictionary(contactsDict);
186            }
187        }
188        contactsDict.close();
189    }
190
191    @Override
192    public Session createSession() {
193        return new AndroidSpellCheckerSession(this);
194    }
195
196    private static SuggestionsInfo getNotInDictEmptySuggestions() {
197        return new SuggestionsInfo(0, EMPTY_STRING_ARRAY);
198    }
199
200    private static SuggestionsInfo getInDictEmptySuggestions() {
201        return new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY,
202                EMPTY_STRING_ARRAY);
203    }
204
205    private static class SuggestionsGatherer implements WordCallback {
206        public static class Result {
207            public final String[] mSuggestions;
208            public final boolean mHasRecommendedSuggestions;
209            public Result(final String[] gatheredSuggestions,
210                    final boolean hasRecommendedSuggestions) {
211                mSuggestions = gatheredSuggestions;
212                mHasRecommendedSuggestions = hasRecommendedSuggestions;
213            }
214        }
215
216        private final ArrayList<CharSequence> mSuggestions;
217        private final int[] mScores;
218        private final String mOriginalText;
219        private final float mSuggestionThreshold;
220        private final float mRecommendedThreshold;
221        private final int mMaxLength;
222        private int mLength = 0;
223
224        // The two following attributes are only ever filled if the requested max length
225        // is 0 (or less, which is treated the same).
226        private String mBestSuggestion = null;
227        private int mBestScore = Integer.MIN_VALUE; // As small as possible
228
229        SuggestionsGatherer(final String originalText, final float suggestionThreshold,
230                final float recommendedThreshold, final int maxLength) {
231            mOriginalText = originalText;
232            mSuggestionThreshold = suggestionThreshold;
233            mRecommendedThreshold = recommendedThreshold;
234            mMaxLength = maxLength;
235            mSuggestions = new ArrayList<CharSequence>(maxLength + 1);
236            mScores = new int[mMaxLength];
237        }
238
239        @Override
240        synchronized public boolean addWord(char[] word, int wordOffset, int wordLength, int score,
241                int dicTypeId, int dataType) {
242            final int positionIndex = Arrays.binarySearch(mScores, 0, mLength, score);
243            // binarySearch returns the index if the element exists, and -<insertion index> - 1
244            // if it doesn't. See documentation for binarySearch.
245            final int insertIndex = positionIndex >= 0 ? positionIndex : -positionIndex - 1;
246
247            if (insertIndex == 0 && mLength >= mMaxLength) {
248                // In the future, we may want to keep track of the best suggestion score even if
249                // we are asked for 0 suggestions. In this case, we can use the following
250                // (tested) code to keep it:
251                // If the maxLength is 0 (should never be less, but if it is, it's treated as 0)
252                // then we need to keep track of the best suggestion in mBestScore and
253                // mBestSuggestion. This is so that we know whether the best suggestion makes
254                // the score cutoff, since we need to know that to return a meaningful
255                // looksLikeTypo.
256                // if (0 >= mMaxLength) {
257                //     if (score > mBestScore) {
258                //         mBestScore = score;
259                //         mBestSuggestion = new String(word, wordOffset, wordLength);
260                //     }
261                // }
262                return true;
263            }
264            if (insertIndex >= mMaxLength) {
265                // We found a suggestion, but its score is too weak to be kept considering
266                // the suggestion limit.
267                return true;
268            }
269
270            // Compute the normalized score and skip this word if it's normalized score does not
271            // make the threshold.
272            final String wordString = new String(word, wordOffset, wordLength);
273            final float normalizedScore =
274                    BinaryDictionary.calcNormalizedScore(mOriginalText, wordString, score);
275            if (normalizedScore < mSuggestionThreshold) {
276                if (DBG) Log.i(TAG, wordString + " does not make the score threshold");
277                return true;
278            }
279
280            if (mLength < mMaxLength) {
281                final int copyLen = mLength - insertIndex;
282                ++mLength;
283                System.arraycopy(mScores, insertIndex, mScores, insertIndex + 1, copyLen);
284                mSuggestions.add(insertIndex, wordString);
285            } else {
286                System.arraycopy(mScores, 1, mScores, 0, insertIndex);
287                mSuggestions.add(insertIndex, wordString);
288                mSuggestions.remove(0);
289            }
290            mScores[insertIndex] = score;
291
292            return true;
293        }
294
295        public Result getResults(final int capitalizeType, final Locale locale) {
296            final String[] gatheredSuggestions;
297            final boolean hasRecommendedSuggestions;
298            if (0 == mLength) {
299                // Either we found no suggestions, or we found some BUT the max length was 0.
300                // If we found some mBestSuggestion will not be null. If it is null, then
301                // we found none, regardless of the max length.
302                if (null == mBestSuggestion) {
303                    gatheredSuggestions = null;
304                    hasRecommendedSuggestions = false;
305                } else {
306                    gatheredSuggestions = EMPTY_STRING_ARRAY;
307                    final float normalizedScore = BinaryDictionary.calcNormalizedScore(
308                            mOriginalText, mBestSuggestion, mBestScore);
309                    hasRecommendedSuggestions = (normalizedScore > mRecommendedThreshold);
310                }
311            } else {
312                if (DBG) {
313                    if (mLength != mSuggestions.size()) {
314                        Log.e(TAG, "Suggestion size is not the same as stored mLength");
315                    }
316                    for (int i = mLength - 1; i >= 0; --i) {
317                        Log.i(TAG, "" + mScores[i] + " " + mSuggestions.get(i));
318                    }
319                }
320                Collections.reverse(mSuggestions);
321                StringUtils.removeDupes(mSuggestions);
322                if (CAPITALIZE_ALL == capitalizeType) {
323                    for (int i = 0; i < mSuggestions.size(); ++i) {
324                        // get(i) returns a CharSequence which is actually a String so .toString()
325                        // should return the same object.
326                        mSuggestions.set(i, mSuggestions.get(i).toString().toUpperCase(locale));
327                    }
328                } else if (CAPITALIZE_FIRST == capitalizeType) {
329                    for (int i = 0; i < mSuggestions.size(); ++i) {
330                        // Likewise
331                        mSuggestions.set(i, StringUtils.toTitleCase(
332                                mSuggestions.get(i).toString(), locale));
333                    }
334                }
335                // This returns a String[], while toArray() returns an Object[] which cannot be cast
336                // into a String[].
337                gatheredSuggestions = mSuggestions.toArray(EMPTY_STRING_ARRAY);
338
339                final int bestScore = mScores[mLength - 1];
340                final CharSequence bestSuggestion = mSuggestions.get(0);
341                final float normalizedScore =
342                        BinaryDictionary.calcNormalizedScore(
343                                mOriginalText, bestSuggestion.toString(), bestScore);
344                hasRecommendedSuggestions = (normalizedScore > mRecommendedThreshold);
345                if (DBG) {
346                    Log.i(TAG, "Best suggestion : " + bestSuggestion + ", score " + bestScore);
347                    Log.i(TAG, "Normalized score = " + normalizedScore
348                            + " (threshold " + mRecommendedThreshold
349                            + ") => hasRecommendedSuggestions = " + hasRecommendedSuggestions);
350                }
351            }
352            return new Result(gatheredSuggestions, hasRecommendedSuggestions);
353        }
354    }
355
356    @Override
357    public boolean onUnbind(final Intent intent) {
358        closeAllDictionaries();
359        return false;
360    }
361
362    private void closeAllDictionaries() {
363        final Map<String, DictionaryPool> oldPools = mDictionaryPools;
364        mDictionaryPools = Collections.synchronizedMap(new TreeMap<String, DictionaryPool>());
365        final Map<String, Dictionary> oldUserDictionaries = mUserDictionaries;
366        mUserDictionaries = Collections.synchronizedMap(new TreeMap<String, Dictionary>());
367        final Map<String, Dictionary> oldWhitelistDictionaries = mWhitelistDictionaries;
368        mWhitelistDictionaries = Collections.synchronizedMap(new TreeMap<String, Dictionary>());
369        for (DictionaryPool pool : oldPools.values()) {
370            pool.close();
371        }
372        for (Dictionary dict : oldUserDictionaries.values()) {
373            dict.close();
374        }
375        for (Dictionary dict : oldWhitelistDictionaries.values()) {
376            dict.close();
377        }
378        synchronized (mUseContactsLock) {
379            if (null != mContactsDictionary) {
380                // The synchronously loaded contacts dictionary should have been in one
381                // or several pools, but it is shielded against multiple closing and it's
382                // safe to call it several times.
383                final Dictionary dictToClose = mContactsDictionary;
384                // TODO: revert to the concrete type when USE_BINARY_CONTACTS_DICTIONARY is no
385                // longer needed
386                mContactsDictionary = null;
387                dictToClose.close();
388            }
389        }
390    }
391
392    private DictionaryPool getDictionaryPool(final String locale) {
393        DictionaryPool pool = mDictionaryPools.get(locale);
394        if (null == pool) {
395            final Locale localeObject = LocaleUtils.constructLocaleFromString(locale);
396            pool = new DictionaryPool(POOL_SIZE, this, localeObject);
397            mDictionaryPools.put(locale, pool);
398        }
399        return pool;
400    }
401
402    public DictAndProximity createDictAndProximity(final Locale locale) {
403        final int script = getScriptFromLocale(locale);
404        final ProximityInfo proximityInfo = ProximityInfo.createSpellCheckerProximityInfo(
405                SpellCheckerProximityInfo.getProximityForScript(script),
406                SpellCheckerProximityInfo.ROW_SIZE,
407                SpellCheckerProximityInfo.PROXIMITY_GRID_WIDTH,
408                SpellCheckerProximityInfo.PROXIMITY_GRID_HEIGHT);
409        final DictionaryCollection dictionaryCollection =
410                DictionaryFactory.createMainDictionaryFromManager(this, locale,
411                        true /* useFullEditDistance */);
412        final String localeStr = locale.toString();
413        Dictionary userDictionary = mUserDictionaries.get(localeStr);
414        if (null == userDictionary) {
415            if (LatinIME.USE_BINARY_USER_DICTIONARY) {
416                userDictionary = new SynchronouslyLoadedUserBinaryDictionary(this, localeStr, true);
417            } else {
418                userDictionary = new SynchronouslyLoadedUserDictionary(this, localeStr, true);
419            }
420            mUserDictionaries.put(localeStr, userDictionary);
421        }
422        dictionaryCollection.addDictionary(userDictionary);
423        Dictionary whitelistDictionary = mWhitelistDictionaries.get(localeStr);
424        if (null == whitelistDictionary) {
425            whitelistDictionary = new WhitelistDictionary(this, locale);
426            mWhitelistDictionaries.put(localeStr, whitelistDictionary);
427        }
428        dictionaryCollection.addDictionary(whitelistDictionary);
429        synchronized (mUseContactsLock) {
430            if (mUseContactsDictionary) {
431                if (null == mContactsDictionary) {
432                    // TODO: revert to the concrete type when USE_BINARY_CONTACTS_DICTIONARY is no
433                    // longer needed
434                    if (LatinIME.USE_BINARY_CONTACTS_DICTIONARY) {
435                        mContactsDictionary = new SynchronouslyLoadedContactsBinaryDictionary(this);
436                    } else {
437                        mContactsDictionary = new SynchronouslyLoadedContactsDictionary(this);
438                    }
439                }
440            }
441            dictionaryCollection.addDictionary(mContactsDictionary);
442            mDictionaryCollectionsList.add(
443                    new WeakReference<DictionaryCollection>(dictionaryCollection));
444        }
445        return new DictAndProximity(dictionaryCollection, proximityInfo);
446    }
447
448    // This method assumes the text is not empty or null.
449    private static int getCapitalizationType(String text) {
450        // If the first char is not uppercase, then the word is either all lower case,
451        // and in either case we return CAPITALIZE_NONE.
452        if (!Character.isUpperCase(text.codePointAt(0))) return CAPITALIZE_NONE;
453        final int len = text.length();
454        int capsCount = 1;
455        for (int i = 1; i < len; i = text.offsetByCodePoints(i, 1)) {
456            if (1 != capsCount && i != capsCount) break;
457            if (Character.isUpperCase(text.codePointAt(i))) ++capsCount;
458        }
459        // We know the first char is upper case. So we want to test if either everything
460        // else is lower case, or if everything else is upper case. If the string is
461        // exactly one char long, then we will arrive here with capsCount 1, and this is
462        // correct, too.
463        if (1 == capsCount) return CAPITALIZE_FIRST;
464        return (len == capsCount ? CAPITALIZE_ALL : CAPITALIZE_NONE);
465    }
466
467    private static class AndroidSpellCheckerSession extends Session {
468        // Immutable, but need the locale which is not available in the constructor yet
469        private DictionaryPool mDictionaryPool;
470        // Likewise
471        private Locale mLocale;
472        // Cache this for performance
473        private int mScript; // One of SCRIPT_LATIN or SCRIPT_CYRILLIC for now.
474
475        private final AndroidSpellCheckerService mService;
476
477        private final SuggestionsCache mSuggestionsCache = new SuggestionsCache();
478
479        private static class SuggestionsParams {
480            public final String[] mSuggestions;
481            public final int mFlags;
482            public SuggestionsParams(String[] suggestions, int flags) {
483                mSuggestions = suggestions;
484                mFlags = flags;
485            }
486        }
487
488        private static class SuggestionsCache {
489            private static final int MAX_CACHE_SIZE = 50;
490            // TODO: support bigram
491            private final LruCache<String, SuggestionsParams> mUnigramSuggestionsInfoCache =
492                    new LruCache<String, SuggestionsParams>(MAX_CACHE_SIZE);
493
494            public SuggestionsParams getSuggestionsFromCache(String query) {
495                return mUnigramSuggestionsInfoCache.get(query);
496            }
497
498            public void putSuggestionsToCache(String query, String[] suggestions, int flags) {
499                if (suggestions == null || TextUtils.isEmpty(query)) {
500                    return;
501                }
502                mUnigramSuggestionsInfoCache.put(query, new SuggestionsParams(suggestions, flags));
503            }
504        }
505
506        AndroidSpellCheckerSession(final AndroidSpellCheckerService service) {
507            mService = service;
508        }
509
510        @Override
511        public void onCreate() {
512            final String localeString = getLocale();
513            mDictionaryPool = mService.getDictionaryPool(localeString);
514            mLocale = LocaleUtils.constructLocaleFromString(localeString);
515            mScript = getScriptFromLocale(mLocale);
516        }
517
518        /*
519         * Returns whether the code point is a letter that makes sense for the specified
520         * locale for this spell checker.
521         * The dictionaries supported by Latin IME are described in res/xml/spellchecker.xml
522         * and is limited to EFIGS languages and Russian.
523         * Hence at the moment this explicitly tests for Cyrillic characters or Latin characters
524         * as appropriate, and explicitly excludes CJK, Arabic and Hebrew characters.
525         */
526        private static boolean isLetterCheckableByLanguage(final int codePoint,
527                final int script) {
528            switch (script) {
529            case SCRIPT_LATIN:
530                // Our supported latin script dictionaries (EFIGS) at the moment only include
531                // characters in the C0, C1, Latin Extended A and B, IPA extensions unicode
532                // blocks. As it happens, those are back-to-back in the code range 0x40 to 0x2AF,
533                // so the below is a very efficient way to test for it. As for the 0-0x3F, it's
534                // excluded from isLetter anyway.
535                return codePoint <= 0x2AF && Character.isLetter(codePoint);
536            case SCRIPT_CYRILLIC:
537                // All Cyrillic characters are in the 400~52F block. There are some in the upper
538                // Unicode range, but they are archaic characters that are not used in modern
539                // russian and are not used by our dictionary.
540                return codePoint >= 0x400 && codePoint <= 0x52F && Character.isLetter(codePoint);
541            default:
542                // Should never come here
543                throw new RuntimeException("Impossible value of script: " + script);
544            }
545        }
546
547        /**
548         * Finds out whether a particular string should be filtered out of spell checking.
549         *
550         * This will loosely match URLs, numbers, symbols. To avoid always underlining words that
551         * we know we will never recognize, this accepts a script identifier that should be one
552         * of the SCRIPT_* constants defined above, to rule out quickly characters from very
553         * different languages.
554         *
555         * @param text the string to evaluate.
556         * @param script the identifier for the script this spell checker recognizes
557         * @return true if we should filter this text out, false otherwise
558         */
559        private static boolean shouldFilterOut(final String text, final int script) {
560            if (TextUtils.isEmpty(text) || text.length() <= 1) return true;
561
562            // TODO: check if an equivalent processing can't be done more quickly with a
563            // compiled regexp.
564            // Filter by first letter
565            final int firstCodePoint = text.codePointAt(0);
566            // Filter out words that don't start with a letter or an apostrophe
567            if (!isLetterCheckableByLanguage(firstCodePoint, script)
568                    && '\'' != firstCodePoint) return true;
569
570            // Filter contents
571            final int length = text.length();
572            int letterCount = 0;
573            for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) {
574                final int codePoint = text.codePointAt(i);
575                // Any word containing a '@' is probably an e-mail address
576                // Any word containing a '/' is probably either an ad-hoc combination of two
577                // words or a URI - in either case we don't want to spell check that
578                if ('@' == codePoint || '/' == codePoint) return true;
579                if (isLetterCheckableByLanguage(codePoint, script)) ++letterCount;
580            }
581            // Guestimate heuristic: perform spell checking if at least 3/4 of the characters
582            // in this word are letters
583            return (letterCount * 4 < length * 3);
584        }
585
586        private SentenceSuggestionsInfo fixWronglyInvalidatedWordWithSingleQuote(
587                TextInfo ti, SentenceSuggestionsInfo ssi) {
588            final String typedText = ti.getText();
589            if (!typedText.contains(SINGLE_QUOTE)) {
590                return null;
591            }
592            final int N = ssi.getSuggestionsCount();
593            final ArrayList<Integer> additionalOffsets = new ArrayList<Integer>();
594            final ArrayList<Integer> additionalLengths = new ArrayList<Integer>();
595            final ArrayList<SuggestionsInfo> additionalSuggestionsInfos =
596                    new ArrayList<SuggestionsInfo>();
597            for (int i = 0; i < N; ++i) {
598                final SuggestionsInfo si = ssi.getSuggestionsInfoAt(i);
599                final int flags = si.getSuggestionsAttributes();
600                if ((flags & SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY) == 0) {
601                    continue;
602                }
603                final int offset = ssi.getOffsetAt(i);
604                final int length = ssi.getLengthAt(i);
605                final String subText = typedText.substring(offset, offset + length);
606                if (!subText.contains(SINGLE_QUOTE)) {
607                    continue;
608                }
609                final String[] splitTexts = subText.split(SINGLE_QUOTE, -1);
610                if (splitTexts == null || splitTexts.length <= 1) {
611                    continue;
612                }
613                final int splitNum = splitTexts.length;
614                for (int j = 0; j < splitNum; ++j) {
615                    final String splitText = splitTexts[j];
616                    if (TextUtils.isEmpty(splitText)) {
617                        continue;
618                    }
619                    if (mSuggestionsCache.getSuggestionsFromCache(splitText) == null) {
620                        continue;
621                    }
622                    final int newLength = splitText.length();
623                    // Neither RESULT_ATTR_IN_THE_DICTIONARY nor RESULT_ATTR_LOOKS_LIKE_TYPO
624                    final int newFlags = 0;
625                    final SuggestionsInfo newSi = new SuggestionsInfo(newFlags, EMPTY_STRING_ARRAY);
626                    newSi.setCookieAndSequence(si.getCookie(), si.getSequence());
627                    if (DBG) {
628                        Log.d(TAG, "Override and remove old span over: "
629                                + splitText + ", " + offset + "," + newLength);
630                    }
631                    additionalOffsets.add(offset);
632                    additionalLengths.add(newLength);
633                    additionalSuggestionsInfos.add(newSi);
634                }
635            }
636            final int additionalSize = additionalOffsets.size();
637            if (additionalSize <= 0) {
638                return null;
639            }
640            final int suggestionsSize = N + additionalSize;
641            final int[] newOffsets = new int[suggestionsSize];
642            final int[] newLengths = new int[suggestionsSize];
643            final SuggestionsInfo[] newSuggestionsInfos = new SuggestionsInfo[suggestionsSize];
644            int i;
645            for (i = 0; i < N; ++i) {
646                newOffsets[i] = ssi.getOffsetAt(i);
647                newLengths[i] = ssi.getLengthAt(i);
648                newSuggestionsInfos[i] = ssi.getSuggestionsInfoAt(i);
649            }
650            for (; i < suggestionsSize; ++i) {
651                newOffsets[i] = additionalOffsets.get(i - N);
652                newLengths[i] = additionalLengths.get(i - N);
653                newSuggestionsInfos[i] = additionalSuggestionsInfos.get(i - N);
654            }
655            return new SentenceSuggestionsInfo(newSuggestionsInfos, newOffsets, newLengths);
656        }
657
658        @Override
659        public SentenceSuggestionsInfo[] onGetSentenceSuggestionsMultiple(
660                TextInfo[] textInfos, int suggestionsLimit) {
661            final SentenceSuggestionsInfo[] retval = super.onGetSentenceSuggestionsMultiple(
662                    textInfos, suggestionsLimit);
663            if (retval == null || retval.length != textInfos.length) {
664                return retval;
665            }
666            for (int i = 0; i < retval.length; ++i) {
667                final SentenceSuggestionsInfo tempSsi =
668                        fixWronglyInvalidatedWordWithSingleQuote(textInfos[i], retval[i]);
669                if (tempSsi != null) {
670                    retval[i] = tempSsi;
671                }
672            }
673            return retval;
674        }
675
676        @Override
677        public SuggestionsInfo[] onGetSuggestionsMultiple(TextInfo[] textInfos,
678                int suggestionsLimit, boolean sequentialWords) {
679            final int length = textInfos.length;
680            final SuggestionsInfo[] retval = new SuggestionsInfo[length];
681            for (int i = 0; i < length; ++i) {
682                final String prevWord;
683                if (sequentialWords && i > 0) {
684                    final String prevWordCandidate = textInfos[i - 1].getText();
685                    // Note that an empty string would be used to indicate the initial word
686                    // in the future.
687                    prevWord = TextUtils.isEmpty(prevWordCandidate) ? null : prevWordCandidate;
688                } else {
689                    prevWord = null;
690                }
691                retval[i] = onGetSuggestions(textInfos[i], prevWord, suggestionsLimit);
692                retval[i].setCookieAndSequence(
693                        textInfos[i].getCookie(), textInfos[i].getSequence());
694            }
695            return retval;
696        }
697
698        // Note : this must be reentrant
699        /**
700         * Gets a list of suggestions for a specific string. This returns a list of possible
701         * corrections for the text passed as an argument. It may split or group words, and
702         * even perform grammatical analysis.
703         */
704        @Override
705        public SuggestionsInfo onGetSuggestions(final TextInfo textInfo,
706                final int suggestionsLimit) {
707            return onGetSuggestions(textInfo, null, suggestionsLimit);
708        }
709
710        private SuggestionsInfo onGetSuggestions(
711                final TextInfo textInfo, final String prevWord, final int suggestionsLimit) {
712            try {
713                final String inText = textInfo.getText();
714                final SuggestionsParams cachedSuggestionsParams =
715                        mSuggestionsCache.getSuggestionsFromCache(inText);
716                if (cachedSuggestionsParams != null) {
717                    if (DBG) {
718                        Log.d(TAG, "Cache hit: " + inText + ", " + cachedSuggestionsParams.mFlags);
719                    }
720                    return new SuggestionsInfo(
721                            cachedSuggestionsParams.mFlags, cachedSuggestionsParams.mSuggestions);
722                }
723
724                if (shouldFilterOut(inText, mScript)) {
725                    DictAndProximity dictInfo = null;
726                    try {
727                        dictInfo = mDictionaryPool.takeOrGetNull();
728                        if (null == dictInfo) return getNotInDictEmptySuggestions();
729                        return dictInfo.mDictionary.isValidWord(inText) ?
730                                getInDictEmptySuggestions() : getNotInDictEmptySuggestions();
731                    } finally {
732                        if (null != dictInfo) {
733                            if (!mDictionaryPool.offer(dictInfo)) {
734                                Log.e(TAG, "Can't re-insert a dictionary into its pool");
735                            }
736                        }
737                    }
738                }
739                final String text = inText.replaceAll(APOSTROPHE, SINGLE_QUOTE);
740
741                // TODO: Don't gather suggestions if the limit is <= 0 unless necessary
742                final SuggestionsGatherer suggestionsGatherer = new SuggestionsGatherer(text,
743                        mService.mSuggestionThreshold, mService.mRecommendedThreshold,
744                        suggestionsLimit);
745                final WordComposer composer = new WordComposer();
746                final int length = text.length();
747                for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) {
748                    final int codePoint = text.codePointAt(i);
749                    // The getXYForCodePointAndScript method returns (Y << 16) + X
750                    final int xy = SpellCheckerProximityInfo.getXYForCodePointAndScript(
751                            codePoint, mScript);
752                    if (SpellCheckerProximityInfo.NOT_A_COORDINATE_PAIR == xy) {
753                        composer.add(codePoint, WordComposer.NOT_A_COORDINATE,
754                                WordComposer.NOT_A_COORDINATE, null);
755                    } else {
756                        composer.add(codePoint, xy & 0xFFFF, xy >> 16, null);
757                    }
758                }
759
760                final int capitalizeType = getCapitalizationType(text);
761                boolean isInDict = true;
762                DictAndProximity dictInfo = null;
763                try {
764                    dictInfo = mDictionaryPool.takeOrGetNull();
765                    if (null == dictInfo) return getNotInDictEmptySuggestions();
766                    dictInfo.mDictionary.getWords(composer, prevWord, suggestionsGatherer,
767                            dictInfo.mProximityInfo);
768                    isInDict = dictInfo.mDictionary.isValidWord(text);
769                    if (!isInDict && CAPITALIZE_NONE != capitalizeType) {
770                        // We want to test the word again if it's all caps or first caps only.
771                        // If it's fully down, we already tested it, if it's mixed case, we don't
772                        // want to test a lowercase version of it.
773                        isInDict = dictInfo.mDictionary.isValidWord(text.toLowerCase(mLocale));
774                    }
775                } finally {
776                    if (null != dictInfo) {
777                        if (!mDictionaryPool.offer(dictInfo)) {
778                            Log.e(TAG, "Can't re-insert a dictionary into its pool");
779                        }
780                    }
781                }
782
783                final SuggestionsGatherer.Result result = suggestionsGatherer.getResults(
784                        capitalizeType, mLocale);
785
786                if (DBG) {
787                    Log.i(TAG, "Spell checking results for " + text + " with suggestion limit "
788                            + suggestionsLimit);
789                    Log.i(TAG, "IsInDict = " + isInDict);
790                    Log.i(TAG, "LooksLikeTypo = " + (!isInDict));
791                    Log.i(TAG, "HasRecommendedSuggestions = " + result.mHasRecommendedSuggestions);
792                    if (null != result.mSuggestions) {
793                        for (String suggestion : result.mSuggestions) {
794                            Log.i(TAG, suggestion);
795                        }
796                    }
797                }
798
799                final int flags =
800                        (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY
801                                : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO)
802                        | (result.mHasRecommendedSuggestions
803                                ? SuggestionsInfoCompatUtils
804                                        .getValueOf_RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS()
805                                : 0);
806                final SuggestionsInfo retval = new SuggestionsInfo(flags, result.mSuggestions);
807                mSuggestionsCache.putSuggestionsToCache(text, result.mSuggestions, flags);
808                return retval;
809            } catch (RuntimeException e) {
810                // Don't kill the keyboard if there is a bug in the spell checker
811                if (DBG) {
812                    throw e;
813                } else {
814                    Log.e(TAG, "Exception while spellcheking: " + e);
815                    return getNotInDictEmptySuggestions();
816                }
817            }
818        }
819    }
820}
821