WordInputEventForPersonalization.java revision 4beeb9253a06482299e0c67467531d30436a02fc
1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.android.inputmethod.latin.utils; 18 19import android.util.Log; 20 21import com.android.inputmethod.annotations.UsedForTesting; 22import com.android.inputmethod.latin.NgramContext; 23import com.android.inputmethod.latin.common.Constants; 24import com.android.inputmethod.latin.common.StringUtils; 25import com.android.inputmethod.latin.settings.SpacingAndPunctuations; 26import com.android.inputmethod.latin.utils.DistracterFilter.HandlingType; 27 28import java.util.ArrayList; 29import java.util.List; 30import java.util.Locale; 31 32// Note: this class is used as a parameter type of a native method. You should be careful when you 33// rename this class or field name. See BinaryDictionary#addMultipleDictionaryEntriesNative(). 34public final class WordInputEventForPersonalization { 35 private static final String TAG = WordInputEventForPersonalization.class.getSimpleName(); 36 private static final boolean DEBUG_TOKEN = false; 37 38 public final int[] mTargetWord; 39 public final int mPrevWordsCount; 40 public final int[][] mPrevWordArray = new int[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM][]; 41 public final boolean[] mIsPrevWordBeginningOfSentenceArray = 42 new boolean[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM]; 43 public final boolean mIsValid; 44 // Time stamp in seconds. 45 public final int mTimestamp; 46 47 @UsedForTesting 48 public WordInputEventForPersonalization(final CharSequence targetWord, 49 final NgramContext ngramContext, final boolean isValid, final int timestamp) { 50 mTargetWord = StringUtils.toCodePointArray(targetWord); 51 mPrevWordsCount = ngramContext.getPrevWordCount(); 52 ngramContext.outputToArray(mPrevWordArray, mIsPrevWordBeginningOfSentenceArray); 53 mIsValid = isValid; 54 mTimestamp = timestamp; 55 } 56 57 // Process a list of words and return a list of {@link WordInputEventForPersonalization} 58 // objects. 59 public static ArrayList<WordInputEventForPersonalization> createInputEventFrom( 60 final List<String> tokens, final int timestamp, 61 final SpacingAndPunctuations spacingAndPunctuations, final Locale locale, 62 final DistracterFilter distracterFilter) { 63 final ArrayList<WordInputEventForPersonalization> inputEvents = new ArrayList<>(); 64 final int N = tokens.size(); 65 NgramContext ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO; 66 for (int i = 0; i < N; ++i) { 67 final String tempWord = tokens.get(i); 68 if (StringUtils.isEmptyStringOrWhiteSpaces(tempWord)) { 69 // just skip this token 70 if (DEBUG_TOKEN) { 71 Log.d(TAG, "--- isEmptyStringOrWhiteSpaces: \"" + tempWord + "\""); 72 } 73 continue; 74 } 75 if (!DictionaryInfoUtils.looksValidForDictionaryInsertion( 76 tempWord, spacingAndPunctuations)) { 77 if (DEBUG_TOKEN) { 78 Log.d(TAG, "--- not looksValidForDictionaryInsertion: \"" 79 + tempWord + "\""); 80 } 81 // Sentence terminator found. Split. 82 // TODO: Detect whether the context is beginning-of-sentence. 83 ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO; 84 continue; 85 } 86 if (DEBUG_TOKEN) { 87 Log.d(TAG, "--- word: \"" + tempWord + "\""); 88 } 89 final WordInputEventForPersonalization inputEvent = 90 detectWhetherVaildWordOrNotAndGetInputEvent( 91 ngramContext, tempWord, timestamp, locale, distracterFilter); 92 if (inputEvent == null) { 93 continue; 94 } 95 inputEvents.add(inputEvent); 96 ngramContext = ngramContext.getNextNgramContext(new NgramContext.WordInfo(tempWord)); 97 } 98 return inputEvents; 99 } 100 101 private static WordInputEventForPersonalization detectWhetherVaildWordOrNotAndGetInputEvent( 102 final NgramContext ngramContext, final String targetWord, final int timestamp, 103 final Locale locale, final DistracterFilter distracterFilter) { 104 if (locale == null) { 105 return null; 106 } 107 final int wordHandlingType = distracterFilter.getWordHandlingType(ngramContext, 108 targetWord, locale); 109 final String word = HandlingType.shouldBeLowerCased(wordHandlingType) ? 110 targetWord.toLowerCase(locale) : targetWord; 111 if (distracterFilter.isDistracterToWordsInDictionaries(ngramContext, targetWord, locale)) { 112 // The word is a distracter. 113 return null; 114 } 115 return new WordInputEventForPersonalization(word, ngramContext, 116 !HandlingType.shouldBeHandledAsOov(wordHandlingType), timestamp); 117 } 118} 119