NgramContext.java revision 5f00fe09e9a611b647592188316e5999465df4d3
1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.android.inputmethod.latin; 18 19import android.text.TextUtils; 20 21import com.android.inputmethod.annotations.UsedForTesting; 22import com.android.inputmethod.latin.utils.StringUtils; 23 24import java.util.Arrays; 25 26/** 27 * Class to represent information of previous words. This class is used to add n-gram entries 28 * into binary dictionaries, to get predictions, and to get suggestions. 29 */ 30public class NgramContext { 31 public static final NgramContext EMPTY_PREV_WORDS_INFO = 32 new NgramContext(WordInfo.EMPTY_WORD_INFO); 33 public static final NgramContext BEGINNING_OF_SENTENCE = 34 new NgramContext(WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO); 35 36 /** 37 * Word information used to represent previous words information. 38 */ 39 public static class WordInfo { 40 public static final WordInfo EMPTY_WORD_INFO = new WordInfo(null); 41 public static final WordInfo BEGINNING_OF_SENTENCE_WORD_INFO = new WordInfo(); 42 43 // This is an empty char sequence when mIsBeginningOfSentence is true. 44 public final CharSequence mWord; 45 // TODO: Have sentence separator. 46 // Whether the current context is beginning of sentence or not. This is true when composing 47 // at the beginning of an input field or composing a word after a sentence separator. 48 public final boolean mIsBeginningOfSentence; 49 50 // Beginning of sentence. 51 private WordInfo() { 52 mWord = ""; 53 mIsBeginningOfSentence = true; 54 } 55 56 public WordInfo(final CharSequence word) { 57 mWord = word; 58 mIsBeginningOfSentence = false; 59 } 60 61 public boolean isValid() { 62 return mWord != null; 63 } 64 65 @Override 66 public int hashCode() { 67 return Arrays.hashCode(new Object[] { mWord, mIsBeginningOfSentence } ); 68 } 69 70 @Override 71 public boolean equals(Object o) { 72 if (this == o) return true; 73 if (!(o instanceof WordInfo)) return false; 74 final WordInfo wordInfo = (WordInfo)o; 75 if (mWord == null || wordInfo.mWord == null) { 76 return mWord == wordInfo.mWord 77 && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence; 78 } 79 return TextUtils.equals(mWord, wordInfo.mWord) 80 && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence; 81 } 82 } 83 84 // The words immediately before the considered word. EMPTY_WORD_INFO element means we don't 85 // have any context for that previous word including the "beginning of sentence context" - we 86 // just don't know what to predict using the information. An example of that is after a comma. 87 // For simplicity of implementation, elements may also be EMPTY_WORD_INFO transiently after the 88 // WordComposer was reset and before starting a new composing word, but we should never be 89 // calling getSuggetions* in this situation. 90 private final WordInfo[] mPrevWordsInfo; 91 private final int mPrevWordsCount; 92 93 // Construct from the previous word information. 94 public NgramContext(final WordInfo... prevWordsInfo) { 95 mPrevWordsInfo = prevWordsInfo; 96 mPrevWordsCount = prevWordsInfo.length; 97 } 98 99 // Create next prevWordsInfo using current prevWordsInfo. 100 public NgramContext getNextNgramContext(final WordInfo wordInfo) { 101 final int nextPrevWordCount = Math.min(Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM, 102 mPrevWordsCount + 1); 103 final WordInfo[] prevWordsInfo = new WordInfo[nextPrevWordCount]; 104 prevWordsInfo[0] = wordInfo; 105 System.arraycopy(mPrevWordsInfo, 0, prevWordsInfo, 1, nextPrevWordCount - 1); 106 return new NgramContext(prevWordsInfo); 107 } 108 109 public boolean isValid() { 110 return mPrevWordsCount > 0 && mPrevWordsInfo[0].isValid(); 111 } 112 113 public boolean isBeginningOfSentenceContext() { 114 return mPrevWordsCount > 0 && mPrevWordsInfo[0].mIsBeginningOfSentence; 115 } 116 117 // n is 1-indexed. 118 // TODO: Remove 119 public CharSequence getNthPrevWord(final int n) { 120 if (n <= 0 || n > mPrevWordsCount) { 121 return null; 122 } 123 return mPrevWordsInfo[n - 1].mWord; 124 } 125 126 // n is 1-indexed. 127 @UsedForTesting 128 public boolean isNthPrevWordBeginningOfSontence(final int n) { 129 if (n <= 0 || n > mPrevWordsCount) { 130 return false; 131 } 132 return mPrevWordsInfo[n - 1].mIsBeginningOfSentence; 133 } 134 135 public void outputToArray(final int[][] codePointArrays, 136 final boolean[] isBeginningOfSentenceArray) { 137 for (int i = 0; i < mPrevWordsCount; i++) { 138 final WordInfo wordInfo = mPrevWordsInfo[i]; 139 if (wordInfo == null || !wordInfo.isValid()) { 140 codePointArrays[i] = new int[0]; 141 isBeginningOfSentenceArray[i] = false; 142 continue; 143 } 144 codePointArrays[i] = StringUtils.toCodePointArray(wordInfo.mWord); 145 isBeginningOfSentenceArray[i] = wordInfo.mIsBeginningOfSentence; 146 } 147 } 148 149 public int getPrevWordCount() { 150 return mPrevWordsCount; 151 } 152 153 @Override 154 public int hashCode() { 155 int hashValue = 0; 156 for (final WordInfo wordInfo : mPrevWordsInfo) { 157 if (wordInfo == null || !WordInfo.EMPTY_WORD_INFO.equals(wordInfo)) { 158 break; 159 } 160 hashValue ^= wordInfo.hashCode(); 161 } 162 return hashValue; 163 } 164 165 @Override 166 public boolean equals(Object o) { 167 if (this == o) return true; 168 if (!(o instanceof NgramContext)) return false; 169 final NgramContext prevWordsInfo = (NgramContext)o; 170 171 final int minLength = Math.min(mPrevWordsCount, prevWordsInfo.mPrevWordsCount); 172 for (int i = 0; i < minLength; i++) { 173 if (!mPrevWordsInfo[i].equals(prevWordsInfo.mPrevWordsInfo[i])) { 174 return false; 175 } 176 } 177 final WordInfo[] longerWordsInfo; 178 final int longerWordsInfoCount; 179 if (mPrevWordsCount > prevWordsInfo.mPrevWordsCount) { 180 longerWordsInfo = mPrevWordsInfo; 181 longerWordsInfoCount = mPrevWordsCount; 182 } else { 183 longerWordsInfo = prevWordsInfo.mPrevWordsInfo; 184 longerWordsInfoCount = prevWordsInfo.mPrevWordsCount; 185 } 186 for (int i = minLength; i < longerWordsInfoCount; i++) { 187 if (longerWordsInfo[i] != null 188 && !WordInfo.EMPTY_WORD_INFO.equals(longerWordsInfo[i])) { 189 return false; 190 } 191 } 192 return true; 193 } 194 195 @Override 196 public String toString() { 197 final StringBuffer builder = new StringBuffer(); 198 for (int i = 0; i < mPrevWordsCount; i++) { 199 final WordInfo wordInfo = mPrevWordsInfo[i]; 200 builder.append("PrevWord["); 201 builder.append(i); 202 builder.append("]: "); 203 if (wordInfo == null) { 204 builder.append("null. "); 205 continue; 206 } 207 if (!wordInfo.isValid()) { 208 builder.append("Empty. "); 209 continue; 210 } 211 builder.append(wordInfo.mWord); 212 builder.append(", isBeginningOfSentence: "); 213 builder.append(wordInfo.mIsBeginningOfSentence); 214 builder.append(". "); 215 } 216 return builder.toString(); 217 } 218} 219