1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16
17package com.android.inputmethod.latin.spellcheck;
18
19import android.content.Intent;
20import android.content.res.Resources;
21import android.service.textservice.SpellCheckerService;
22import android.text.TextUtils;
23import android.util.Log;
24import android.view.textservice.SuggestionsInfo;
25import android.view.textservice.TextInfo;
26
27import com.android.inputmethod.compat.ArraysCompatUtils;
28import com.android.inputmethod.keyboard.ProximityInfo;
29import com.android.inputmethod.latin.BinaryDictionary;
30import com.android.inputmethod.latin.Dictionary;
31import com.android.inputmethod.latin.Dictionary.DataType;
32import com.android.inputmethod.latin.Dictionary.WordCallback;
33import com.android.inputmethod.latin.DictionaryCollection;
34import com.android.inputmethod.latin.DictionaryFactory;
35import com.android.inputmethod.latin.Flag;
36import com.android.inputmethod.latin.LocaleUtils;
37import com.android.inputmethod.latin.R;
38import com.android.inputmethod.latin.SynchronouslyLoadedContactsDictionary;
39import com.android.inputmethod.latin.SynchronouslyLoadedUserDictionary;
40import com.android.inputmethod.latin.Utils;
41import com.android.inputmethod.latin.WhitelistDictionary;
42import com.android.inputmethod.latin.WordComposer;
43
44import java.util.ArrayList;
45import java.util.Arrays;
46import java.util.Collections;
47import java.util.Locale;
48import java.util.Map;
49import java.util.TreeMap;
50
51/**
52 * Service for spell checking, using LatinIME's dictionaries and mechanisms.
53 */
54public class AndroidSpellCheckerService extends SpellCheckerService {
55    private static final String TAG = AndroidSpellCheckerService.class.getSimpleName();
56    private static final boolean DBG = false;
57    private static final int POOL_SIZE = 2;
58
59    private static final int CAPITALIZE_NONE = 0; // No caps, or mixed case
60    private static final int CAPITALIZE_FIRST = 1; // First only
61    private static final int CAPITALIZE_ALL = 2; // All caps
62
63    private final static String[] EMPTY_STRING_ARRAY = new String[0];
64    private final static Flag[] USE_FULL_EDIT_DISTANCE_FLAG_ARRAY;
65    static {
66        // See BinaryDictionary.java for an explanation of these flags
67        // Specifially, ALL_CONFIG_FLAGS means that we want to consider all flags with the
68        // current dictionary configuration - for example, consider the UMLAUT flag
69        // so that it will be turned on for German dictionaries and off for others.
70        USE_FULL_EDIT_DISTANCE_FLAG_ARRAY = Arrays.copyOf(BinaryDictionary.ALL_CONFIG_FLAGS,
71                BinaryDictionary.ALL_CONFIG_FLAGS.length + 1);
72        USE_FULL_EDIT_DISTANCE_FLAG_ARRAY[BinaryDictionary.ALL_CONFIG_FLAGS.length] =
73                BinaryDictionary.FLAG_USE_FULL_EDIT_DISTANCE;
74    }
75    private Map<String, DictionaryPool> mDictionaryPools =
76            Collections.synchronizedMap(new TreeMap<String, DictionaryPool>());
77    private Map<String, Dictionary> mUserDictionaries =
78            Collections.synchronizedMap(new TreeMap<String, Dictionary>());
79    private Map<String, Dictionary> mWhitelistDictionaries =
80            Collections.synchronizedMap(new TreeMap<String, Dictionary>());
81    private SynchronouslyLoadedContactsDictionary mContactsDictionary;
82
83    // The threshold for a candidate to be offered as a suggestion.
84    private double mSuggestionThreshold;
85    // The threshold for a suggestion to be considered "likely".
86    private double mLikelyThreshold;
87
88    @Override public void onCreate() {
89        super.onCreate();
90        mSuggestionThreshold =
91                Double.parseDouble(getString(R.string.spellchecker_suggestion_threshold_value));
92        mLikelyThreshold =
93                Double.parseDouble(getString(R.string.spellchecker_likely_threshold_value));
94    }
95
96    @Override
97    public Session createSession() {
98        return new AndroidSpellCheckerSession(this);
99    }
100
101    private static SuggestionsInfo getNotInDictEmptySuggestions() {
102        return new SuggestionsInfo(0, EMPTY_STRING_ARRAY);
103    }
104
105    private static SuggestionsInfo getInDictEmptySuggestions() {
106        return new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY,
107                EMPTY_STRING_ARRAY);
108    }
109
110    private static class SuggestionsGatherer implements WordCallback {
111        public static class Result {
112            public final String[] mSuggestions;
113            public final boolean mHasLikelySuggestions;
114            public Result(final String[] gatheredSuggestions, final boolean hasLikelySuggestions) {
115                mSuggestions = gatheredSuggestions;
116                mHasLikelySuggestions = hasLikelySuggestions;
117            }
118        }
119
120        private final ArrayList<CharSequence> mSuggestions;
121        private final int[] mScores;
122        private final String mOriginalText;
123        private final double mSuggestionThreshold;
124        private final double mLikelyThreshold;
125        private final int mMaxLength;
126        private int mLength = 0;
127
128        // The two following attributes are only ever filled if the requested max length
129        // is 0 (or less, which is treated the same).
130        private String mBestSuggestion = null;
131        private int mBestScore = Integer.MIN_VALUE; // As small as possible
132
133        SuggestionsGatherer(final String originalText, final double suggestionThreshold,
134                final double likelyThreshold, final int maxLength) {
135            mOriginalText = originalText;
136            mSuggestionThreshold = suggestionThreshold;
137            mLikelyThreshold = likelyThreshold;
138            mMaxLength = maxLength;
139            mSuggestions = new ArrayList<CharSequence>(maxLength + 1);
140            mScores = new int[mMaxLength];
141        }
142
143        @Override
144        synchronized public boolean addWord(char[] word, int wordOffset, int wordLength, int score,
145                int dicTypeId, DataType dataType) {
146            final int positionIndex = ArraysCompatUtils.binarySearch(mScores, 0, mLength, score);
147            // binarySearch returns the index if the element exists, and -<insertion index> - 1
148            // if it doesn't. See documentation for binarySearch.
149            final int insertIndex = positionIndex >= 0 ? positionIndex : -positionIndex - 1;
150
151            if (insertIndex == 0 && mLength >= mMaxLength) {
152                // In the future, we may want to keep track of the best suggestion score even if
153                // we are asked for 0 suggestions. In this case, we can use the following
154                // (tested) code to keep it:
155                // If the maxLength is 0 (should never be less, but if it is, it's treated as 0)
156                // then we need to keep track of the best suggestion in mBestScore and
157                // mBestSuggestion. This is so that we know whether the best suggestion makes
158                // the score cutoff, since we need to know that to return a meaningful
159                // looksLikeTypo.
160                // if (0 >= mMaxLength) {
161                //     if (score > mBestScore) {
162                //         mBestScore = score;
163                //         mBestSuggestion = new String(word, wordOffset, wordLength);
164                //     }
165                // }
166                return true;
167            }
168            if (insertIndex >= mMaxLength) {
169                // We found a suggestion, but its score is too weak to be kept considering
170                // the suggestion limit.
171                return true;
172            }
173
174            // Compute the normalized score and skip this word if it's normalized score does not
175            // make the threshold.
176            final String wordString = new String(word, wordOffset, wordLength);
177            final double normalizedScore =
178                    Utils.calcNormalizedScore(mOriginalText, wordString, score);
179            if (normalizedScore < mSuggestionThreshold) {
180                if (DBG) Log.i(TAG, wordString + " does not make the score threshold");
181                return true;
182            }
183
184            if (mLength < mMaxLength) {
185                final int copyLen = mLength - insertIndex;
186                ++mLength;
187                System.arraycopy(mScores, insertIndex, mScores, insertIndex + 1, copyLen);
188                mSuggestions.add(insertIndex, wordString);
189            } else {
190                System.arraycopy(mScores, 1, mScores, 0, insertIndex);
191                mSuggestions.add(insertIndex, wordString);
192                mSuggestions.remove(0);
193            }
194            mScores[insertIndex] = score;
195
196            return true;
197        }
198
199        public Result getResults(final int capitalizeType, final Locale locale) {
200            final String[] gatheredSuggestions;
201            final boolean hasLikelySuggestions;
202            if (0 == mLength) {
203                // Either we found no suggestions, or we found some BUT the max length was 0.
204                // If we found some mBestSuggestion will not be null. If it is null, then
205                // we found none, regardless of the max length.
206                if (null == mBestSuggestion) {
207                    gatheredSuggestions = null;
208                    hasLikelySuggestions = false;
209                } else {
210                    gatheredSuggestions = EMPTY_STRING_ARRAY;
211                    final double normalizedScore =
212                            Utils.calcNormalizedScore(mOriginalText, mBestSuggestion, mBestScore);
213                    hasLikelySuggestions = (normalizedScore > mLikelyThreshold);
214                }
215            } else {
216                if (DBG) {
217                    if (mLength != mSuggestions.size()) {
218                        Log.e(TAG, "Suggestion size is not the same as stored mLength");
219                    }
220                    for (int i = mLength - 1; i >= 0; --i) {
221                        Log.i(TAG, "" + mScores[i] + " " + mSuggestions.get(i));
222                    }
223                }
224                Collections.reverse(mSuggestions);
225                Utils.removeDupes(mSuggestions);
226                if (CAPITALIZE_ALL == capitalizeType) {
227                    for (int i = 0; i < mSuggestions.size(); ++i) {
228                        // get(i) returns a CharSequence which is actually a String so .toString()
229                        // should return the same object.
230                        mSuggestions.set(i, mSuggestions.get(i).toString().toUpperCase(locale));
231                    }
232                } else if (CAPITALIZE_FIRST == capitalizeType) {
233                    for (int i = 0; i < mSuggestions.size(); ++i) {
234                        // Likewise
235                        mSuggestions.set(i, Utils.toTitleCase(mSuggestions.get(i).toString(),
236                                locale));
237                    }
238                }
239                // This returns a String[], while toArray() returns an Object[] which cannot be cast
240                // into a String[].
241                gatheredSuggestions = mSuggestions.toArray(EMPTY_STRING_ARRAY);
242
243                final int bestScore = mScores[mLength - 1];
244                final CharSequence bestSuggestion = mSuggestions.get(0);
245                final double normalizedScore =
246                        Utils.calcNormalizedScore(mOriginalText, bestSuggestion, bestScore);
247                hasLikelySuggestions = (normalizedScore > mLikelyThreshold);
248                if (DBG) {
249                    Log.i(TAG, "Best suggestion : " + bestSuggestion + ", score " + bestScore);
250                    Log.i(TAG, "Normalized score = " + normalizedScore
251                            + " (threshold " + mLikelyThreshold
252                            + ") => hasLikelySuggestions = " + hasLikelySuggestions);
253                }
254            }
255            return new Result(gatheredSuggestions, hasLikelySuggestions);
256        }
257    }
258
259    @Override
260    public boolean onUnbind(final Intent intent) {
261        final Map<String, DictionaryPool> oldPools = mDictionaryPools;
262        mDictionaryPools = Collections.synchronizedMap(new TreeMap<String, DictionaryPool>());
263        final Map<String, Dictionary> oldUserDictionaries = mUserDictionaries;
264        mUserDictionaries = Collections.synchronizedMap(new TreeMap<String, Dictionary>());
265        final Map<String, Dictionary> oldWhitelistDictionaries = mWhitelistDictionaries;
266        mWhitelistDictionaries = Collections.synchronizedMap(new TreeMap<String, Dictionary>());
267        for (DictionaryPool pool : oldPools.values()) {
268            pool.close();
269        }
270        for (Dictionary dict : oldUserDictionaries.values()) {
271            dict.close();
272        }
273        for (Dictionary dict : oldWhitelistDictionaries.values()) {
274            dict.close();
275        }
276        if (null != mContactsDictionary) {
277            // The synchronously loaded contacts dictionary should have been in one
278            // or several pools, but it is shielded against multiple closing and it's
279            // safe to call it several times.
280            final SynchronouslyLoadedContactsDictionary dictToClose = mContactsDictionary;
281            mContactsDictionary = null;
282            dictToClose.close();
283        }
284        return false;
285    }
286
287    private DictionaryPool getDictionaryPool(final String locale) {
288        DictionaryPool pool = mDictionaryPools.get(locale);
289        if (null == pool) {
290            final Locale localeObject = LocaleUtils.constructLocaleFromString(locale);
291            pool = new DictionaryPool(POOL_SIZE, this, localeObject);
292            mDictionaryPools.put(locale, pool);
293        }
294        return pool;
295    }
296
297    public DictAndProximity createDictAndProximity(final Locale locale) {
298        final ProximityInfo proximityInfo = ProximityInfo.createSpellCheckerProximityInfo();
299        final Resources resources = getResources();
300        final int fallbackResourceId = Utils.getMainDictionaryResourceId(resources);
301        final DictionaryCollection dictionaryCollection =
302                DictionaryFactory.createDictionaryFromManager(this, locale, fallbackResourceId,
303                        USE_FULL_EDIT_DISTANCE_FLAG_ARRAY);
304        final String localeStr = locale.toString();
305        Dictionary userDictionary = mUserDictionaries.get(localeStr);
306        if (null == userDictionary) {
307            userDictionary = new SynchronouslyLoadedUserDictionary(this, localeStr, true);
308            mUserDictionaries.put(localeStr, userDictionary);
309        }
310        dictionaryCollection.addDictionary(userDictionary);
311        Dictionary whitelistDictionary = mWhitelistDictionaries.get(localeStr);
312        if (null == whitelistDictionary) {
313            whitelistDictionary = new WhitelistDictionary(this, locale);
314            mWhitelistDictionaries.put(localeStr, whitelistDictionary);
315        }
316        dictionaryCollection.addDictionary(whitelistDictionary);
317        if (null == mContactsDictionary) {
318            mContactsDictionary = new SynchronouslyLoadedContactsDictionary(this);
319        }
320        // TODO: add a setting to use or not contacts when checking spelling
321        dictionaryCollection.addDictionary(mContactsDictionary);
322        return new DictAndProximity(dictionaryCollection, proximityInfo);
323    }
324
325    // This method assumes the text is not empty or null.
326    private static int getCapitalizationType(String text) {
327        // If the first char is not uppercase, then the word is either all lower case,
328        // and in either case we return CAPITALIZE_NONE.
329        if (!Character.isUpperCase(text.codePointAt(0))) return CAPITALIZE_NONE;
330        final int len = text.codePointCount(0, text.length());
331        int capsCount = 1;
332        for (int i = 1; i < len; ++i) {
333            if (1 != capsCount && i != capsCount) break;
334            if (Character.isUpperCase(text.codePointAt(i))) ++capsCount;
335        }
336        // We know the first char is upper case. So we want to test if either everything
337        // else is lower case, or if everything else is upper case. If the string is
338        // exactly one char long, then we will arrive here with capsCount 1, and this is
339        // correct, too.
340        if (1 == capsCount) return CAPITALIZE_FIRST;
341        return (len == capsCount ? CAPITALIZE_ALL : CAPITALIZE_NONE);
342    }
343
344    private static class AndroidSpellCheckerSession extends Session {
345        // Immutable, but need the locale which is not available in the constructor yet
346        private DictionaryPool mDictionaryPool;
347        // Likewise
348        private Locale mLocale;
349
350        private final AndroidSpellCheckerService mService;
351
352        AndroidSpellCheckerSession(final AndroidSpellCheckerService service) {
353            mService = service;
354        }
355
356        @Override
357        public void onCreate() {
358            final String localeString = getLocale();
359            mDictionaryPool = mService.getDictionaryPool(localeString);
360            mLocale = LocaleUtils.constructLocaleFromString(localeString);
361        }
362
363        /**
364         * Finds out whether a particular string should be filtered out of spell checking.
365         *
366         * This will loosely match URLs, numbers, symbols.
367         *
368         * @param text the string to evaluate.
369         * @return true if we should filter this text out, false otherwise
370         */
371        private boolean shouldFilterOut(final String text) {
372            if (TextUtils.isEmpty(text) || text.length() <= 1) return true;
373
374            // TODO: check if an equivalent processing can't be done more quickly with a
375            // compiled regexp.
376            // Filter by first letter
377            final int firstCodePoint = text.codePointAt(0);
378            // Filter out words that don't start with a letter or an apostrophe
379            if (!Character.isLetter(firstCodePoint)
380                    && '\'' != firstCodePoint) return true;
381
382            // Filter contents
383            final int length = text.length();
384            int letterCount = 0;
385            for (int i = 0; i < length; ++i) {
386                final int codePoint = text.codePointAt(i);
387                // Any word containing a '@' is probably an e-mail address
388                // Any word containing a '/' is probably either an ad-hoc combination of two
389                // words or a URI - in either case we don't want to spell check that
390                if ('@' == codePoint
391                        || '/' == codePoint) return true;
392                if (Character.isLetter(codePoint)) ++letterCount;
393            }
394            // Guestimate heuristic: perform spell checking if at least 3/4 of the characters
395            // in this word are letters
396            return (letterCount * 4 < length * 3);
397        }
398
399        // Note : this must be reentrant
400        /**
401         * Gets a list of suggestions for a specific string. This returns a list of possible
402         * corrections for the text passed as an argument. It may split or group words, and
403         * even perform grammatical analysis.
404         */
405        @Override
406        public SuggestionsInfo onGetSuggestions(final TextInfo textInfo,
407                final int suggestionsLimit) {
408            try {
409                final String text = textInfo.getText();
410
411                if (shouldFilterOut(text)) {
412                    DictAndProximity dictInfo = null;
413                    try {
414                        dictInfo = mDictionaryPool.takeOrGetNull();
415                        if (null == dictInfo) return getNotInDictEmptySuggestions();
416                        return dictInfo.mDictionary.isValidWord(text) ? getInDictEmptySuggestions()
417                                : getNotInDictEmptySuggestions();
418                    } finally {
419                        if (null != dictInfo) {
420                            if (!mDictionaryPool.offer(dictInfo)) {
421                                Log.e(TAG, "Can't re-insert a dictionary into its pool");
422                            }
423                        }
424                    }
425                }
426
427                // TODO: Don't gather suggestions if the limit is <= 0 unless necessary
428                final SuggestionsGatherer suggestionsGatherer = new SuggestionsGatherer(text,
429                        mService.mSuggestionThreshold, mService.mLikelyThreshold, suggestionsLimit);
430                final WordComposer composer = new WordComposer();
431                final int length = text.length();
432                for (int i = 0; i < length; ++i) {
433                    final int character = text.codePointAt(i);
434                    final int proximityIndex = SpellCheckerProximityInfo.getIndexOf(character);
435                    final int[] proximities;
436                    if (-1 == proximityIndex) {
437                        proximities = new int[] { character };
438                    } else {
439                        proximities = Arrays.copyOfRange(SpellCheckerProximityInfo.PROXIMITY,
440                                proximityIndex,
441                                proximityIndex + SpellCheckerProximityInfo.ROW_SIZE);
442                    }
443                    composer.add(character, proximities,
444                            WordComposer.NOT_A_COORDINATE, WordComposer.NOT_A_COORDINATE);
445                }
446
447                final int capitalizeType = getCapitalizationType(text);
448                boolean isInDict = true;
449                DictAndProximity dictInfo = null;
450                try {
451                    dictInfo = mDictionaryPool.takeOrGetNull();
452                    if (null == dictInfo) return getNotInDictEmptySuggestions();
453                    dictInfo.mDictionary.getWords(composer, suggestionsGatherer,
454                            dictInfo.mProximityInfo);
455                    isInDict = dictInfo.mDictionary.isValidWord(text);
456                    if (!isInDict && CAPITALIZE_NONE != capitalizeType) {
457                        // We want to test the word again if it's all caps or first caps only.
458                        // If it's fully down, we already tested it, if it's mixed case, we don't
459                        // want to test a lowercase version of it.
460                        isInDict = dictInfo.mDictionary.isValidWord(text.toLowerCase(mLocale));
461                    }
462                } finally {
463                    if (null != dictInfo) {
464                        if (!mDictionaryPool.offer(dictInfo)) {
465                            Log.e(TAG, "Can't re-insert a dictionary into its pool");
466                        }
467                    }
468                }
469
470                final SuggestionsGatherer.Result result = suggestionsGatherer.getResults(
471                        capitalizeType, mLocale);
472
473                if (DBG) {
474                    Log.i(TAG, "Spell checking results for " + text + " with suggestion limit "
475                            + suggestionsLimit);
476                    Log.i(TAG, "IsInDict = " + isInDict);
477                    Log.i(TAG, "LooksLikeTypo = " + (!isInDict));
478                    Log.i(TAG, "HasLikelySuggestions = " + result.mHasLikelySuggestions);
479                    if (null != result.mSuggestions) {
480                        for (String suggestion : result.mSuggestions) {
481                            Log.i(TAG, suggestion);
482                        }
483                    }
484                }
485
486                // TODO: actually use result.mHasLikelySuggestions
487                final int flags =
488                        (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY
489                                : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO);
490                return new SuggestionsInfo(flags, result.mSuggestions);
491            } catch (RuntimeException e) {
492                // Don't kill the keyboard if there is a bug in the spell checker
493                if (DBG) {
494                    throw e;
495                } else {
496                    Log.e(TAG, "Exception while spellcheking: " + e);
497                    return getNotInDictEmptySuggestions();
498                }
499            }
500        }
501    }
502}
503