NgramContext.java revision d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8
1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.android.inputmethod.latin; 18 19import android.text.TextUtils; 20 21import com.android.inputmethod.annotations.UsedForTesting; 22import com.android.inputmethod.latin.utils.StringUtils; 23 24import java.util.Arrays; 25 26import javax.annotation.Nonnull; 27 28/** 29 * Class to represent information of previous words. This class is used to add n-gram entries 30 * into binary dictionaries, to get predictions, and to get suggestions. 31 */ 32public class NgramContext { 33 @Nonnull 34 public static final NgramContext EMPTY_PREV_WORDS_INFO = 35 new NgramContext(WordInfo.EMPTY_WORD_INFO); 36 @Nonnull 37 public static final NgramContext BEGINNING_OF_SENTENCE = 38 new NgramContext(WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO); 39 40 /** 41 * Word information used to represent previous words information. 42 */ 43 public static class WordInfo { 44 @Nonnull 45 public static final WordInfo EMPTY_WORD_INFO = new WordInfo(null); 46 @Nonnull 47 public static final WordInfo BEGINNING_OF_SENTENCE_WORD_INFO = new WordInfo(); 48 49 // This is an empty char sequence when mIsBeginningOfSentence is true. 50 public final CharSequence mWord; 51 // TODO: Have sentence separator. 52 // Whether the current context is beginning of sentence or not. This is true when composing 53 // at the beginning of an input field or composing a word after a sentence separator. 54 public final boolean mIsBeginningOfSentence; 55 56 // Beginning of sentence. 57 private WordInfo() { 58 mWord = ""; 59 mIsBeginningOfSentence = true; 60 } 61 62 public WordInfo(final CharSequence word) { 63 mWord = word; 64 mIsBeginningOfSentence = false; 65 } 66 67 public boolean isValid() { 68 return mWord != null; 69 } 70 71 @Override 72 public int hashCode() { 73 return Arrays.hashCode(new Object[] { mWord, mIsBeginningOfSentence } ); 74 } 75 76 @Override 77 public boolean equals(Object o) { 78 if (this == o) return true; 79 if (!(o instanceof WordInfo)) return false; 80 final WordInfo wordInfo = (WordInfo)o; 81 if (mWord == null || wordInfo.mWord == null) { 82 return mWord == wordInfo.mWord 83 && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence; 84 } 85 return TextUtils.equals(mWord, wordInfo.mWord) 86 && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence; 87 } 88 } 89 90 // The words immediately before the considered word. EMPTY_WORD_INFO element means we don't 91 // have any context for that previous word including the "beginning of sentence context" - we 92 // just don't know what to predict using the information. An example of that is after a comma. 93 // For simplicity of implementation, elements may also be EMPTY_WORD_INFO transiently after the 94 // WordComposer was reset and before starting a new composing word, but we should never be 95 // calling getSuggetions* in this situation. 96 private final WordInfo[] mPrevWordsInfo; 97 private final int mPrevWordsCount; 98 99 // Construct from the previous word information. 100 public NgramContext(final WordInfo... prevWordsInfo) { 101 mPrevWordsInfo = prevWordsInfo; 102 mPrevWordsCount = prevWordsInfo.length; 103 } 104 105 // Create next prevWordsInfo using current prevWordsInfo. 106 @Nonnull 107 public NgramContext getNextNgramContext(final WordInfo wordInfo) { 108 final int nextPrevWordCount = Math.min(Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM, 109 mPrevWordsCount + 1); 110 final WordInfo[] prevWordsInfo = new WordInfo[nextPrevWordCount]; 111 prevWordsInfo[0] = wordInfo; 112 System.arraycopy(mPrevWordsInfo, 0, prevWordsInfo, 1, nextPrevWordCount - 1); 113 return new NgramContext(prevWordsInfo); 114 } 115 116 public boolean isValid() { 117 return mPrevWordsCount > 0 && mPrevWordsInfo[0].isValid(); 118 } 119 120 public boolean isBeginningOfSentenceContext() { 121 return mPrevWordsCount > 0 && mPrevWordsInfo[0].mIsBeginningOfSentence; 122 } 123 124 // n is 1-indexed. 125 // TODO: Remove 126 public CharSequence getNthPrevWord(final int n) { 127 if (n <= 0 || n > mPrevWordsCount) { 128 return null; 129 } 130 return mPrevWordsInfo[n - 1].mWord; 131 } 132 133 // n is 1-indexed. 134 @UsedForTesting 135 public boolean isNthPrevWordBeginningOfSontence(final int n) { 136 if (n <= 0 || n > mPrevWordsCount) { 137 return false; 138 } 139 return mPrevWordsInfo[n - 1].mIsBeginningOfSentence; 140 } 141 142 public void outputToArray(final int[][] codePointArrays, 143 final boolean[] isBeginningOfSentenceArray) { 144 for (int i = 0; i < mPrevWordsCount; i++) { 145 final WordInfo wordInfo = mPrevWordsInfo[i]; 146 if (wordInfo == null || !wordInfo.isValid()) { 147 codePointArrays[i] = new int[0]; 148 isBeginningOfSentenceArray[i] = false; 149 continue; 150 } 151 codePointArrays[i] = StringUtils.toCodePointArray(wordInfo.mWord); 152 isBeginningOfSentenceArray[i] = wordInfo.mIsBeginningOfSentence; 153 } 154 } 155 156 public int getPrevWordCount() { 157 return mPrevWordsCount; 158 } 159 160 @Override 161 public int hashCode() { 162 int hashValue = 0; 163 for (final WordInfo wordInfo : mPrevWordsInfo) { 164 if (wordInfo == null || !WordInfo.EMPTY_WORD_INFO.equals(wordInfo)) { 165 break; 166 } 167 hashValue ^= wordInfo.hashCode(); 168 } 169 return hashValue; 170 } 171 172 @Override 173 public boolean equals(Object o) { 174 if (this == o) return true; 175 if (!(o instanceof NgramContext)) return false; 176 final NgramContext prevWordsInfo = (NgramContext)o; 177 178 final int minLength = Math.min(mPrevWordsCount, prevWordsInfo.mPrevWordsCount); 179 for (int i = 0; i < minLength; i++) { 180 if (!mPrevWordsInfo[i].equals(prevWordsInfo.mPrevWordsInfo[i])) { 181 return false; 182 } 183 } 184 final WordInfo[] longerWordsInfo; 185 final int longerWordsInfoCount; 186 if (mPrevWordsCount > prevWordsInfo.mPrevWordsCount) { 187 longerWordsInfo = mPrevWordsInfo; 188 longerWordsInfoCount = mPrevWordsCount; 189 } else { 190 longerWordsInfo = prevWordsInfo.mPrevWordsInfo; 191 longerWordsInfoCount = prevWordsInfo.mPrevWordsCount; 192 } 193 for (int i = minLength; i < longerWordsInfoCount; i++) { 194 if (longerWordsInfo[i] != null 195 && !WordInfo.EMPTY_WORD_INFO.equals(longerWordsInfo[i])) { 196 return false; 197 } 198 } 199 return true; 200 } 201 202 @Override 203 public String toString() { 204 final StringBuffer builder = new StringBuffer(); 205 for (int i = 0; i < mPrevWordsCount; i++) { 206 final WordInfo wordInfo = mPrevWordsInfo[i]; 207 builder.append("PrevWord["); 208 builder.append(i); 209 builder.append("]: "); 210 if (wordInfo == null) { 211 builder.append("null. "); 212 continue; 213 } 214 if (!wordInfo.isValid()) { 215 builder.append("Empty. "); 216 continue; 217 } 218 builder.append(wordInfo.mWord); 219 builder.append(", isBeginningOfSentence: "); 220 builder.append(wordInfo.mIsBeginningOfSentence); 221 builder.append(". "); 222 } 223 return builder.toString(); 224 } 225} 226