WordInputEventForPersonalization.java revision 16cc3992d7468ef781753df7b4227330e0834501
1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.android.inputmethod.latin.utils; 18 19import android.util.Log; 20 21import com.android.inputmethod.annotations.UsedForTesting; 22import com.android.inputmethod.latin.Constants; 23import com.android.inputmethod.latin.NgramContext; 24import com.android.inputmethod.latin.settings.SpacingAndPunctuations; 25import com.android.inputmethod.latin.utils.DistracterFilter.HandlingType; 26 27import java.util.ArrayList; 28import java.util.List; 29import java.util.Locale; 30 31// Note: this class is used as a parameter type of a native method. You should be careful when you 32// rename this class or field name. See BinaryDictionary#addMultipleDictionaryEntriesNative(). 33public final class WordInputEventForPersonalization { 34 private static final String TAG = WordInputEventForPersonalization.class.getSimpleName(); 35 private static final boolean DEBUG_TOKEN = false; 36 37 public final int[] mTargetWord; 38 public final int mPrevWordsCount; 39 public final int[][] mPrevWordArray = new int[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM][]; 40 public final boolean[] mIsPrevWordBeginningOfSentenceArray = 41 new boolean[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM]; 42 public final boolean mIsValid; 43 // Time stamp in seconds. 44 public final int mTimestamp; 45 46 @UsedForTesting 47 public WordInputEventForPersonalization(final CharSequence targetWord, 48 final NgramContext ngramContext, final boolean isValid, final int timestamp) { 49 mTargetWord = StringUtils.toCodePointArray(targetWord); 50 mPrevWordsCount = ngramContext.getPrevWordCount(); 51 ngramContext.outputToArray(mPrevWordArray, mIsPrevWordBeginningOfSentenceArray); 52 mIsValid = isValid; 53 mTimestamp = timestamp; 54 } 55 56 // Process a list of words and return a list of {@link WordInputEventForPersonalization} 57 // objects. 58 public static ArrayList<WordInputEventForPersonalization> createInputEventFrom( 59 final List<String> tokens, final int timestamp, 60 final SpacingAndPunctuations spacingAndPunctuations, final Locale locale, 61 final DistracterFilter distracterFilter) { 62 final ArrayList<WordInputEventForPersonalization> inputEvents = new ArrayList<>(); 63 final int N = tokens.size(); 64 NgramContext ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO; 65 for (int i = 0; i < N; ++i) { 66 final String tempWord = tokens.get(i); 67 if (StringUtils.isEmptyStringOrWhiteSpaces(tempWord)) { 68 // just skip this token 69 if (DEBUG_TOKEN) { 70 Log.d(TAG, "--- isEmptyStringOrWhiteSpaces: \"" + tempWord + "\""); 71 } 72 continue; 73 } 74 if (!DictionaryInfoUtils.looksValidForDictionaryInsertion( 75 tempWord, spacingAndPunctuations)) { 76 if (DEBUG_TOKEN) { 77 Log.d(TAG, "--- not looksValidForDictionaryInsertion: \"" 78 + tempWord + "\""); 79 } 80 // Sentence terminator found. Split. 81 // TODO: Detect whether the context is beginning-of-sentence. 82 ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO; 83 continue; 84 } 85 if (DEBUG_TOKEN) { 86 Log.d(TAG, "--- word: \"" + tempWord + "\""); 87 } 88 final WordInputEventForPersonalization inputEvent = 89 detectWhetherVaildWordOrNotAndGetInputEvent( 90 ngramContext, tempWord, timestamp, locale, distracterFilter); 91 if (inputEvent == null) { 92 continue; 93 } 94 inputEvents.add(inputEvent); 95 ngramContext = ngramContext.getNextNgramContext(new NgramContext.WordInfo(tempWord)); 96 } 97 return inputEvents; 98 } 99 100 private static WordInputEventForPersonalization detectWhetherVaildWordOrNotAndGetInputEvent( 101 final NgramContext ngramContext, final String targetWord, final int timestamp, 102 final Locale locale, final DistracterFilter distracterFilter) { 103 if (locale == null) { 104 return null; 105 } 106 final int wordHandlingType = distracterFilter.getWordHandlingType(ngramContext, 107 targetWord, locale); 108 final String word = HandlingType.shouldBeLowerCased(wordHandlingType) ? 109 targetWord.toLowerCase(locale) : targetWord; 110 if (distracterFilter.isDistracterToWordsInDictionaries(ngramContext, targetWord, locale)) { 111 // The word is a distracter. 112 return null; 113 } 114 return new WordInputEventForPersonalization(word, ngramContext, 115 !HandlingType.shouldBeHandledAsOov(wordHandlingType), timestamp); 116 } 117} 118