NgramContext.java revision 4466464c24d6c6523f170f56b7e65e43ceb699e2
183c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi/* 283c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * Copyright (C) 2014 The Android Open Source Project 383c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * 483c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * Licensed under the Apache License, Version 2.0 (the "License"); 583c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * you may not use this file except in compliance with the License. 683c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * You may obtain a copy of the License at 783c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * 883c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * http://www.apache.org/licenses/LICENSE-2.0 983c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * 1083c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * Unless required by applicable law or agreed to in writing, software 1183c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * distributed under the License is distributed on an "AS IS" BASIS, 1283c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1383c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * See the License for the specific language governing permissions and 1483c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * limitations under the License. 1583c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi */ 1683c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi 1783c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagipackage com.android.inputmethod.latin; 1883c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi 1986f36003fd4397143bd37938dda029e5707634afYohei Yukawaimport android.text.TextUtils; 20e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 21e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagiimport com.android.inputmethod.latin.utils.StringUtils; 22e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 2386f36003fd4397143bd37938dda029e5707634afYohei Yukawaimport java.util.Arrays; 2486f36003fd4397143bd37938dda029e5707634afYohei Yukawa 25a790c5b68324da41428aeb68594d43ca5632f66dKeisuke Kuroyanagi/** 26a790c5b68324da41428aeb68594d43ca5632f66dKeisuke Kuroyanagi * Class to represent information of previous words. This class is used to add n-gram entries 27a790c5b68324da41428aeb68594d43ca5632f66dKeisuke Kuroyanagi * into binary dictionaries, to get predictions, and to get suggestions. 28a790c5b68324da41428aeb68594d43ca5632f66dKeisuke Kuroyanagi */ 2983c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagipublic class PrevWordsInfo { 30e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi public static final PrevWordsInfo EMPTY_PREV_WORDS_INFO = 31e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi new PrevWordsInfo(WordInfo.EMPTY_WORD_INFO); 321c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi public static final PrevWordsInfo BEGINNING_OF_SENTENCE = 331c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi new PrevWordsInfo(WordInfo.BEGINNING_OF_SENTENCE); 341adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi 35e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi /** 36e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi * Word information used to represent previous words information. 37e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi */ 38e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi public static class WordInfo { 39e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi public static final WordInfo EMPTY_WORD_INFO = new WordInfo(null); 40e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi public static final WordInfo BEGINNING_OF_SENTENCE = new WordInfo(); 41e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 4286f36003fd4397143bd37938dda029e5707634afYohei Yukawa // This is an empty char sequence when mIsBeginningOfSentence is true. 4386f36003fd4397143bd37938dda029e5707634afYohei Yukawa public final CharSequence mWord; 44e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // TODO: Have sentence separator. 45e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // Whether the current context is beginning of sentence or not. This is true when composing 46e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // at the beginning of an input field or composing a word after a sentence separator. 47e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi public final boolean mIsBeginningOfSentence; 48e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 49e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // Beginning of sentence. 50e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi public WordInfo() { 51e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi mWord = ""; 52e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi mIsBeginningOfSentence = true; 53e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 54e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 5586f36003fd4397143bd37938dda029e5707634afYohei Yukawa public WordInfo(final CharSequence word) { 56e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi mWord = word; 57e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi mIsBeginningOfSentence = false; 58e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 59e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 60e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi public boolean isValid() { 61e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi return mWord != null; 62e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 631c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi 641c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi @Override 651c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi public int hashCode() { 661c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi return Arrays.hashCode(new Object[] { mWord, mIsBeginningOfSentence } ); 671c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi } 681c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi 691c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi @Override 701c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi public boolean equals(Object o) { 711c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi if (this == o) return true; 721c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi if (!(o instanceof WordInfo)) return false; 731c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi final WordInfo wordInfo = (WordInfo)o; 741c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi if (mWord == null || wordInfo.mWord == null) { 751c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi return mWord == wordInfo.mWord 761c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence; 771c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi } 7886f36003fd4397143bd37938dda029e5707634afYohei Yukawa return TextUtils.equals(mWord, wordInfo.mWord) 791c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence; 801c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi } 81e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 8283c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi 83e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // The words immediately before the considered word. EMPTY_WORD_INFO element means we don't 84e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // have any context for that previous word including the "beginning of sentence context" - we 85e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // just don't know what to predict using the information. An example of that is after a comma. 86e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // For simplicity of implementation, elements may also be EMPTY_WORD_INFO transiently after the 87e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // WordComposer was reset and before starting a new composing word, but we should never be 88e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // calling getSuggetions* in this situation. 894466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi public final WordInfo[] mPrevWordsInfo; 9017f326b7458c2bde2569e283a96e703755485328Keisuke Kuroyanagi 91e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // Construct from the previous word information. 92e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi public PrevWordsInfo(final WordInfo prevWordInfo) { 934466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi mPrevWordsInfo = new WordInfo[] { prevWordInfo }; 94e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 95e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 96e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // Construct from WordInfo array. n-th element represents (n+1)-th previous word's information. 97e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi public PrevWordsInfo(final WordInfo[] prevWordsInfo) { 984466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi mPrevWordsInfo = prevWordsInfo; 99e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 100e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 101e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // Create next prevWordsInfo using current prevWordsInfo. 102e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi public PrevWordsInfo getNextPrevWordsInfo(final WordInfo wordInfo) { 1034466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi final int nextPrevWordCount = Math.min(Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM, 1044466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi mPrevWordsInfo.length + 1); 1054466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi final WordInfo[] prevWordsInfo = new WordInfo[nextPrevWordCount]; 106e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi prevWordsInfo[0] = wordInfo; 1074466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi System.arraycopy(mPrevWordsInfo, 0, prevWordsInfo, 1, prevWordsInfo.length - 1); 108e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi return new PrevWordsInfo(prevWordsInfo); 10983c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi } 1101adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi 1111adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi public boolean isValid() { 1124466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi return mPrevWordsInfo.length > 0 && mPrevWordsInfo[0].isValid(); 113e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 114e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 115e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi public void outputToArray(final int[][] codePointArrays, 116e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi final boolean[] isBeginningOfSentenceArray) { 117e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi for (int i = 0; i < mPrevWordsInfo.length; i++) { 118e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi final WordInfo wordInfo = mPrevWordsInfo[i]; 119e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi if (wordInfo == null || !wordInfo.isValid()) { 120e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi codePointArrays[i] = new int[0]; 121e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi isBeginningOfSentenceArray[i] = false; 122e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi continue; 123e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 124e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi codePointArrays[i] = StringUtils.toCodePointArray(wordInfo.mWord); 125e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi isBeginningOfSentenceArray[i] = wordInfo.mIsBeginningOfSentence; 126e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 1271adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi } 128dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi 1294466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi public int getPrevWordCount() { 1304466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi return mPrevWordsInfo.length; 1314466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi } 1324466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi 133dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi @Override 1341c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi public int hashCode() { 1351c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi return Arrays.hashCode(mPrevWordsInfo); 1361c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi } 1371c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi 1381c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi @Override 1391c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi public boolean equals(Object o) { 1401c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi if (this == o) return true; 1411c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi if (!(o instanceof PrevWordsInfo)) return false; 1421c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi final PrevWordsInfo prevWordsInfo = (PrevWordsInfo)o; 1431c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi return Arrays.equals(mPrevWordsInfo, prevWordsInfo.mPrevWordsInfo); 1441c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi } 1451c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi 1461c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi @Override 147dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi public String toString() { 148e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi final StringBuffer builder = new StringBuffer(); 149e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi for (int i = 0; i < mPrevWordsInfo.length; i++) { 150e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi final WordInfo wordInfo = mPrevWordsInfo[i]; 151e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi builder.append("PrevWord["); 152e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi builder.append(i); 153e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi builder.append("]: "); 1541c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi if (wordInfo == null || !wordInfo.isValid()) { 155e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi builder.append("Empty. "); 156e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi continue; 157e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 158e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi builder.append(wordInfo.mWord); 159e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi builder.append(", isBeginningOfSentence: "); 160e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi builder.append(wordInfo.mIsBeginningOfSentence); 161e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi builder.append(". "); 162e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 163e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi return builder.toString(); 164dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi } 16583c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi} 166