NgramContext.java revision 9290d0a4eba454b9b1501830a4e470005cc85332
1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.android.inputmethod.latin; 18 19import android.text.TextUtils; 20 21import com.android.inputmethod.annotations.UsedForTesting; 22import com.android.inputmethod.latin.utils.StringUtils; 23 24import java.util.Arrays; 25 26/** 27 * Class to represent information of previous words. This class is used to add n-gram entries 28 * into binary dictionaries, to get predictions, and to get suggestions. 29 */ 30public class NgramContext { 31 public static final NgramContext EMPTY_PREV_WORDS_INFO = 32 new NgramContext(WordInfo.EMPTY_WORD_INFO); 33 public static final NgramContext BEGINNING_OF_SENTENCE = 34 new NgramContext(WordInfo.BEGINNING_OF_SENTENCE); 35 36 /** 37 * Word information used to represent previous words information. 38 */ 39 public static class WordInfo { 40 public static final WordInfo EMPTY_WORD_INFO = new WordInfo(null); 41 public static final WordInfo BEGINNING_OF_SENTENCE = new WordInfo(); 42 43 // This is an empty char sequence when mIsBeginningOfSentence is true. 44 public final CharSequence mWord; 45 // TODO: Have sentence separator. 46 // Whether the current context is beginning of sentence or not. This is true when composing 47 // at the beginning of an input field or composing a word after a sentence separator. 48 public final boolean mIsBeginningOfSentence; 49 50 // Beginning of sentence. 51 public WordInfo() { 52 mWord = ""; 53 mIsBeginningOfSentence = true; 54 } 55 56 public WordInfo(final CharSequence word) { 57 mWord = word; 58 mIsBeginningOfSentence = false; 59 } 60 61 public boolean isValid() { 62 return mWord != null; 63 } 64 65 @Override 66 public int hashCode() { 67 return Arrays.hashCode(new Object[] { mWord, mIsBeginningOfSentence } ); 68 } 69 70 @Override 71 public boolean equals(Object o) { 72 if (this == o) return true; 73 if (!(o instanceof WordInfo)) return false; 74 final WordInfo wordInfo = (WordInfo)o; 75 if (mWord == null || wordInfo.mWord == null) { 76 return mWord == wordInfo.mWord 77 && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence; 78 } 79 return TextUtils.equals(mWord, wordInfo.mWord) 80 && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence; 81 } 82 } 83 84 // The words immediately before the considered word. EMPTY_WORD_INFO element means we don't 85 // have any context for that previous word including the "beginning of sentence context" - we 86 // just don't know what to predict using the information. An example of that is after a comma. 87 // For simplicity of implementation, elements may also be EMPTY_WORD_INFO transiently after the 88 // WordComposer was reset and before starting a new composing word, but we should never be 89 // calling getSuggetions* in this situation. 90 private final WordInfo[] mPrevWordsInfo; 91 private final int mPrevWordsCount; 92 93 // Construct from the previous word information. 94 public NgramContext(final WordInfo... prevWordsInfo) { 95 mPrevWordsInfo = prevWordsInfo; 96 mPrevWordsCount = prevWordsInfo.length; 97 } 98 99 // Construct from WordInfo array and size. The caller shouldn't change prevWordsInfo after 100 // calling this method. 101 private NgramContext(final NgramContext ngramContext, final int prevWordsCount) { 102 if (ngramContext.mPrevWordsCount < prevWordsCount) { 103 throw new IndexOutOfBoundsException("ngramContext.mPrevWordsCount (" 104 + ngramContext.mPrevWordsCount + ") is smaller than prevWordsCount (" 105 + prevWordsCount + ")"); 106 } 107 mPrevWordsInfo = ngramContext.mPrevWordsInfo; 108 mPrevWordsCount = prevWordsCount; 109 } 110 111 // Create next prevWordsInfo using current prevWordsInfo. 112 public NgramContext getNextNgramContext(final WordInfo wordInfo) { 113 final int nextPrevWordCount = Math.min(Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM, 114 mPrevWordsCount + 1); 115 final WordInfo[] prevWordsInfo = new WordInfo[nextPrevWordCount]; 116 prevWordsInfo[0] = wordInfo; 117 System.arraycopy(mPrevWordsInfo, 0, prevWordsInfo, 1, nextPrevWordCount - 1); 118 return new NgramContext(prevWordsInfo); 119 } 120 121 public boolean isValid() { 122 return mPrevWordsCount > 0 && mPrevWordsInfo[0].isValid(); 123 } 124 125 public boolean isBeginningOfSentenceContext() { 126 return mPrevWordsCount > 0 && mPrevWordsInfo[0].mIsBeginningOfSentence; 127 } 128 129 // n is 1-indexed. 130 // TODO: Remove 131 public CharSequence getNthPrevWord(final int n) { 132 if (n <= 0 || n > mPrevWordsCount) { 133 return null; 134 } 135 return mPrevWordsInfo[n - 1].mWord; 136 } 137 138 // n is 1-indexed. 139 @UsedForTesting 140 public boolean isNthPrevWordBeginningOfSontence(final int n) { 141 if (n <= 0 || n > mPrevWordsCount) { 142 return false; 143 } 144 return mPrevWordsInfo[n - 1].mIsBeginningOfSentence; 145 } 146 147 public void outputToArray(final int[][] codePointArrays, 148 final boolean[] isBeginningOfSentenceArray) { 149 for (int i = 0; i < mPrevWordsCount; i++) { 150 final WordInfo wordInfo = mPrevWordsInfo[i]; 151 if (wordInfo == null || !wordInfo.isValid()) { 152 codePointArrays[i] = new int[0]; 153 isBeginningOfSentenceArray[i] = false; 154 continue; 155 } 156 codePointArrays[i] = StringUtils.toCodePointArray(wordInfo.mWord); 157 isBeginningOfSentenceArray[i] = wordInfo.mIsBeginningOfSentence; 158 } 159 } 160 161 public int getPrevWordCount() { 162 return mPrevWordsCount; 163 } 164 165 @Override 166 public int hashCode() { 167 int hashValue = 0; 168 for (final WordInfo wordInfo : mPrevWordsInfo) { 169 if (wordInfo == null || !WordInfo.EMPTY_WORD_INFO.equals(wordInfo)) { 170 break; 171 } 172 hashValue ^= wordInfo.hashCode(); 173 } 174 return hashValue; 175 } 176 177 @Override 178 public boolean equals(Object o) { 179 if (this == o) return true; 180 if (!(o instanceof NgramContext)) return false; 181 final NgramContext prevWordsInfo = (NgramContext)o; 182 183 final int minLength = Math.min(mPrevWordsCount, prevWordsInfo.mPrevWordsCount); 184 for (int i = 0; i < minLength; i++) { 185 if (!mPrevWordsInfo[i].equals(prevWordsInfo.mPrevWordsInfo[i])) { 186 return false; 187 } 188 } 189 final WordInfo[] longerWordsInfo; 190 final int longerWordsInfoCount; 191 if (mPrevWordsCount > prevWordsInfo.mPrevWordsCount) { 192 longerWordsInfo = mPrevWordsInfo; 193 longerWordsInfoCount = mPrevWordsCount; 194 } else { 195 longerWordsInfo = prevWordsInfo.mPrevWordsInfo; 196 longerWordsInfoCount = prevWordsInfo.mPrevWordsCount; 197 } 198 for (int i = minLength; i < longerWordsInfoCount; i++) { 199 if (longerWordsInfo[i] != null 200 && !WordInfo.EMPTY_WORD_INFO.equals(longerWordsInfo[i])) { 201 return false; 202 } 203 } 204 return true; 205 } 206 207 @Override 208 public String toString() { 209 final StringBuffer builder = new StringBuffer(); 210 for (int i = 0; i < mPrevWordsCount; i++) { 211 final WordInfo wordInfo = mPrevWordsInfo[i]; 212 builder.append("PrevWord["); 213 builder.append(i); 214 builder.append("]: "); 215 if (wordInfo == null) { 216 builder.append("null. "); 217 continue; 218 } 219 if (!wordInfo.isValid()) { 220 builder.append("Empty. "); 221 continue; 222 } 223 builder.append(wordInfo.mWord); 224 builder.append(", isBeginningOfSentence: "); 225 builder.append(wordInfo.mIsBeginningOfSentence); 226 builder.append(". "); 227 } 228 return builder.toString(); 229 } 230} 231