WordInputEventForPersonalization.java revision 4beeb9253a06482299e0c67467531d30436a02fc
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin.utils;
18
19import android.util.Log;
20
21import com.android.inputmethod.annotations.UsedForTesting;
22import com.android.inputmethod.latin.NgramContext;
23import com.android.inputmethod.latin.common.Constants;
24import com.android.inputmethod.latin.common.StringUtils;
25import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
26import com.android.inputmethod.latin.utils.DistracterFilter.HandlingType;
27
28import java.util.ArrayList;
29import java.util.List;
30import java.util.Locale;
31
32// Note: this class is used as a parameter type of a native method. You should be careful when you
33// rename this class or field name. See BinaryDictionary#addMultipleDictionaryEntriesNative().
34public final class WordInputEventForPersonalization {
35    private static final String TAG = WordInputEventForPersonalization.class.getSimpleName();
36    private static final boolean DEBUG_TOKEN = false;
37
38    public final int[] mTargetWord;
39    public final int mPrevWordsCount;
40    public final int[][] mPrevWordArray = new int[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM][];
41    public final boolean[] mIsPrevWordBeginningOfSentenceArray =
42            new boolean[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM];
43    public final boolean mIsValid;
44    // Time stamp in seconds.
45    public final int mTimestamp;
46
47    @UsedForTesting
48    public WordInputEventForPersonalization(final CharSequence targetWord,
49            final NgramContext ngramContext, final boolean isValid, final int timestamp) {
50        mTargetWord = StringUtils.toCodePointArray(targetWord);
51        mPrevWordsCount = ngramContext.getPrevWordCount();
52        ngramContext.outputToArray(mPrevWordArray, mIsPrevWordBeginningOfSentenceArray);
53        mIsValid = isValid;
54        mTimestamp = timestamp;
55    }
56
57    // Process a list of words and return a list of {@link WordInputEventForPersonalization}
58    // objects.
59    public static ArrayList<WordInputEventForPersonalization> createInputEventFrom(
60            final List<String> tokens, final int timestamp,
61            final SpacingAndPunctuations spacingAndPunctuations, final Locale locale,
62            final DistracterFilter distracterFilter) {
63        final ArrayList<WordInputEventForPersonalization> inputEvents = new ArrayList<>();
64        final int N = tokens.size();
65        NgramContext ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO;
66        for (int i = 0; i < N; ++i) {
67            final String tempWord = tokens.get(i);
68            if (StringUtils.isEmptyStringOrWhiteSpaces(tempWord)) {
69                // just skip this token
70                if (DEBUG_TOKEN) {
71                    Log.d(TAG, "--- isEmptyStringOrWhiteSpaces: \"" + tempWord + "\"");
72                }
73                continue;
74            }
75            if (!DictionaryInfoUtils.looksValidForDictionaryInsertion(
76                    tempWord, spacingAndPunctuations)) {
77                if (DEBUG_TOKEN) {
78                    Log.d(TAG, "--- not looksValidForDictionaryInsertion: \""
79                            + tempWord + "\"");
80                }
81                // Sentence terminator found. Split.
82                // TODO: Detect whether the context is beginning-of-sentence.
83                ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO;
84                continue;
85            }
86            if (DEBUG_TOKEN) {
87                Log.d(TAG, "--- word: \"" + tempWord + "\"");
88            }
89            final WordInputEventForPersonalization inputEvent =
90                    detectWhetherVaildWordOrNotAndGetInputEvent(
91                            ngramContext, tempWord, timestamp, locale, distracterFilter);
92            if (inputEvent == null) {
93                continue;
94            }
95            inputEvents.add(inputEvent);
96            ngramContext = ngramContext.getNextNgramContext(new NgramContext.WordInfo(tempWord));
97        }
98        return inputEvents;
99    }
100
101    private static WordInputEventForPersonalization detectWhetherVaildWordOrNotAndGetInputEvent(
102            final NgramContext ngramContext, final String targetWord, final int timestamp,
103            final Locale locale, final DistracterFilter distracterFilter) {
104        if (locale == null) {
105            return null;
106        }
107        final int wordHandlingType = distracterFilter.getWordHandlingType(ngramContext,
108                targetWord, locale);
109        final String word = HandlingType.shouldBeLowerCased(wordHandlingType) ?
110                targetWord.toLowerCase(locale) : targetWord;
111        if (distracterFilter.isDistracterToWordsInDictionaries(ngramContext, targetWord, locale)) {
112            // The word is a distracter.
113            return null;
114        }
115        return new WordInputEventForPersonalization(word, ngramContext,
116                !HandlingType.shouldBeHandledAsOov(wordHandlingType), timestamp);
117    }
118}
119