WordInputEventForPersonalization.java revision 0f7d881dc72132dfd75c8b4fe61a69fc5cdcd460
1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.android.inputmethod.latin.utils; 18 19import android.util.Log; 20 21import com.android.inputmethod.annotations.UsedForTesting; 22import com.android.inputmethod.latin.NgramContext; 23import com.android.inputmethod.latin.common.StringUtils; 24import com.android.inputmethod.latin.define.DecoderSpecificConstants; 25import com.android.inputmethod.latin.settings.SpacingAndPunctuations; 26import com.android.inputmethod.latin.utils.DistracterFilter.HandlingType; 27 28import java.util.ArrayList; 29import java.util.List; 30import java.util.Locale; 31 32// Note: this class is used as a parameter type of a native method. You should be careful when you 33// rename this class or field name. See BinaryDictionary#addMultipleDictionaryEntriesNative(). 34public final class WordInputEventForPersonalization { 35 private static final String TAG = WordInputEventForPersonalization.class.getSimpleName(); 36 private static final boolean DEBUG_TOKEN = false; 37 38 public final int[] mTargetWord; 39 public final int mPrevWordsCount; 40 public final int[][] mPrevWordArray = 41 new int[DecoderSpecificConstants.MAX_PREV_WORD_COUNT_FOR_N_GRAM][]; 42 public final boolean[] mIsPrevWordBeginningOfSentenceArray = 43 new boolean[DecoderSpecificConstants.MAX_PREV_WORD_COUNT_FOR_N_GRAM]; 44 public final boolean mIsValid; 45 // Time stamp in seconds. 46 public final int mTimestamp; 47 48 @UsedForTesting 49 public WordInputEventForPersonalization(final CharSequence targetWord, 50 final NgramContext ngramContext, final boolean isValid, final int timestamp) { 51 mTargetWord = StringUtils.toCodePointArray(targetWord); 52 mPrevWordsCount = ngramContext.getPrevWordCount(); 53 ngramContext.outputToArray(mPrevWordArray, mIsPrevWordBeginningOfSentenceArray); 54 mIsValid = isValid; 55 mTimestamp = timestamp; 56 } 57 58 // Process a list of words and return a list of {@link WordInputEventForPersonalization} 59 // objects. 60 public static ArrayList<WordInputEventForPersonalization> createInputEventFrom( 61 final List<String> tokens, final int timestamp, 62 final SpacingAndPunctuations spacingAndPunctuations, final Locale locale, 63 final DistracterFilter distracterFilter) { 64 final ArrayList<WordInputEventForPersonalization> inputEvents = new ArrayList<>(); 65 final int N = tokens.size(); 66 NgramContext ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO; 67 for (int i = 0; i < N; ++i) { 68 final String tempWord = tokens.get(i); 69 if (StringUtils.isEmptyStringOrWhiteSpaces(tempWord)) { 70 // just skip this token 71 if (DEBUG_TOKEN) { 72 Log.d(TAG, "--- isEmptyStringOrWhiteSpaces: \"" + tempWord + "\""); 73 } 74 continue; 75 } 76 if (!DictionaryInfoUtils.looksValidForDictionaryInsertion( 77 tempWord, spacingAndPunctuations)) { 78 if (DEBUG_TOKEN) { 79 Log.d(TAG, "--- not looksValidForDictionaryInsertion: \"" 80 + tempWord + "\""); 81 } 82 // Sentence terminator found. Split. 83 // TODO: Detect whether the context is beginning-of-sentence. 84 ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO; 85 continue; 86 } 87 if (DEBUG_TOKEN) { 88 Log.d(TAG, "--- word: \"" + tempWord + "\""); 89 } 90 final WordInputEventForPersonalization inputEvent = 91 detectWhetherVaildWordOrNotAndGetInputEvent( 92 ngramContext, tempWord, timestamp, locale, distracterFilter); 93 if (inputEvent == null) { 94 continue; 95 } 96 inputEvents.add(inputEvent); 97 ngramContext = ngramContext.getNextNgramContext(new NgramContext.WordInfo(tempWord)); 98 } 99 return inputEvents; 100 } 101 102 private static WordInputEventForPersonalization detectWhetherVaildWordOrNotAndGetInputEvent( 103 final NgramContext ngramContext, final String targetWord, final int timestamp, 104 final Locale locale, final DistracterFilter distracterFilter) { 105 if (locale == null) { 106 return null; 107 } 108 final int wordHandlingType = distracterFilter.getWordHandlingType(ngramContext, 109 targetWord, locale); 110 final String word = HandlingType.shouldBeLowerCased(wordHandlingType) ? 111 targetWord.toLowerCase(locale) : targetWord; 112 if (distracterFilter.isDistracterToWordsInDictionaries(ngramContext, targetWord, locale)) { 113 // The word is a distracter. 114 return null; 115 } 116 return new WordInputEventForPersonalization(word, ngramContext, 117 !HandlingType.shouldBeHandledAsOov(wordHandlingType), timestamp); 118 } 119} 120