NgramContext.java revision 4466464c24d6c6523f170f56b7e65e43ceb699e2
183c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi/*
283c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * Copyright (C) 2014 The Android Open Source Project
383c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi *
483c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * Licensed under the Apache License, Version 2.0 (the "License");
583c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * you may not use this file except in compliance with the License.
683c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * You may obtain a copy of the License at
783c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi *
883c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi *      http://www.apache.org/licenses/LICENSE-2.0
983c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi *
1083c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * Unless required by applicable law or agreed to in writing, software
1183c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * distributed under the License is distributed on an "AS IS" BASIS,
1283c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1383c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * See the License for the specific language governing permissions and
1483c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * limitations under the License.
1583c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi */
1683c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi
1783c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagipackage com.android.inputmethod.latin;
1883c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi
1986f36003fd4397143bd37938dda029e5707634afYohei Yukawaimport android.text.TextUtils;
20e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi
21e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagiimport com.android.inputmethod.latin.utils.StringUtils;
22e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi
2386f36003fd4397143bd37938dda029e5707634afYohei Yukawaimport java.util.Arrays;
2486f36003fd4397143bd37938dda029e5707634afYohei Yukawa
25a790c5b68324da41428aeb68594d43ca5632f66dKeisuke Kuroyanagi/**
26a790c5b68324da41428aeb68594d43ca5632f66dKeisuke Kuroyanagi * Class to represent information of previous words. This class is used to add n-gram entries
27a790c5b68324da41428aeb68594d43ca5632f66dKeisuke Kuroyanagi * into binary dictionaries, to get predictions, and to get suggestions.
28a790c5b68324da41428aeb68594d43ca5632f66dKeisuke Kuroyanagi */
2983c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagipublic class PrevWordsInfo {
30e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    public static final PrevWordsInfo EMPTY_PREV_WORDS_INFO =
31e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            new PrevWordsInfo(WordInfo.EMPTY_WORD_INFO);
321c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi    public static final PrevWordsInfo BEGINNING_OF_SENTENCE =
331c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi            new PrevWordsInfo(WordInfo.BEGINNING_OF_SENTENCE);
341adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi
35e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    /**
36e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi     * Word information used to represent previous words information.
37e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi     */
38e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    public static class WordInfo {
39e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        public static final WordInfo EMPTY_WORD_INFO = new WordInfo(null);
40e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        public static final WordInfo BEGINNING_OF_SENTENCE = new WordInfo();
41e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi
4286f36003fd4397143bd37938dda029e5707634afYohei Yukawa        // This is an empty char sequence when mIsBeginningOfSentence is true.
4386f36003fd4397143bd37938dda029e5707634afYohei Yukawa        public final CharSequence mWord;
44e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        // TODO: Have sentence separator.
45e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        // Whether the current context is beginning of sentence or not. This is true when composing
46e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        // at the beginning of an input field or composing a word after a sentence separator.
47e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        public final boolean mIsBeginningOfSentence;
48e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi
49e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        // Beginning of sentence.
50e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        public WordInfo() {
51e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            mWord = "";
52e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            mIsBeginningOfSentence = true;
53e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        }
54e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi
5586f36003fd4397143bd37938dda029e5707634afYohei Yukawa        public WordInfo(final CharSequence word) {
56e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            mWord = word;
57e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            mIsBeginningOfSentence = false;
58e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        }
59e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi
60e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        public boolean isValid() {
61e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            return mWord != null;
62e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        }
631c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi
641c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi        @Override
651c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi        public int hashCode() {
661c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi            return Arrays.hashCode(new Object[] { mWord, mIsBeginningOfSentence } );
671c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi        }
681c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi
691c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi        @Override
701c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi        public boolean equals(Object o) {
711c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi            if (this == o) return true;
721c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi            if (!(o instanceof WordInfo)) return false;
731c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi            final WordInfo wordInfo = (WordInfo)o;
741c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi            if (mWord == null || wordInfo.mWord == null) {
751c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi                return mWord == wordInfo.mWord
761c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi                        && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence;
771c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi            }
7886f36003fd4397143bd37938dda029e5707634afYohei Yukawa            return TextUtils.equals(mWord, wordInfo.mWord)
791c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi                    && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence;
801c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi        }
81e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    }
8283c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi
83e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    // The words immediately before the considered word. EMPTY_WORD_INFO element means we don't
84e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    // have any context for that previous word including the "beginning of sentence context" - we
85e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    // just don't know what to predict using the information. An example of that is after a comma.
86e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    // For simplicity of implementation, elements may also be EMPTY_WORD_INFO transiently after the
87e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    // WordComposer was reset and before starting a new composing word, but we should never be
88e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    // calling getSuggetions* in this situation.
894466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi    public final WordInfo[] mPrevWordsInfo;
9017f326b7458c2bde2569e283a96e703755485328Keisuke Kuroyanagi
91e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    // Construct from the previous word information.
92e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    public PrevWordsInfo(final WordInfo prevWordInfo) {
934466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi        mPrevWordsInfo = new WordInfo[] { prevWordInfo };
94e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    }
95e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi
96e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    // Construct from WordInfo array. n-th element represents (n+1)-th previous word's information.
97e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    public PrevWordsInfo(final WordInfo[] prevWordsInfo) {
984466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi        mPrevWordsInfo = prevWordsInfo;
99e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    }
100e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi
101e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    // Create next prevWordsInfo using current prevWordsInfo.
102e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    public PrevWordsInfo getNextPrevWordsInfo(final WordInfo wordInfo) {
1034466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi        final int nextPrevWordCount = Math.min(Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM,
1044466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi                mPrevWordsInfo.length + 1);
1054466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi        final WordInfo[] prevWordsInfo = new WordInfo[nextPrevWordCount];
106e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        prevWordsInfo[0] = wordInfo;
1074466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi        System.arraycopy(mPrevWordsInfo, 0, prevWordsInfo, 1, prevWordsInfo.length - 1);
108e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        return new PrevWordsInfo(prevWordsInfo);
10983c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi    }
1101adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi
1111adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi    public boolean isValid() {
1124466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi        return mPrevWordsInfo.length > 0 && mPrevWordsInfo[0].isValid();
113e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    }
114e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi
115e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    public void outputToArray(final int[][] codePointArrays,
116e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            final boolean[] isBeginningOfSentenceArray) {
117e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        for (int i = 0; i < mPrevWordsInfo.length; i++) {
118e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            final WordInfo wordInfo = mPrevWordsInfo[i];
119e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            if (wordInfo == null || !wordInfo.isValid()) {
120e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi                codePointArrays[i] = new int[0];
121e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi                isBeginningOfSentenceArray[i] = false;
122e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi                continue;
123e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            }
124e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            codePointArrays[i] = StringUtils.toCodePointArray(wordInfo.mWord);
125e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            isBeginningOfSentenceArray[i] = wordInfo.mIsBeginningOfSentence;
126e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        }
1271adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi    }
128dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi
1294466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi    public int getPrevWordCount() {
1304466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi        return mPrevWordsInfo.length;
1314466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi    }
1324466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi
133dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi    @Override
1341c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi    public int hashCode() {
1351c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi        return Arrays.hashCode(mPrevWordsInfo);
1361c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi    }
1371c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi
1381c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi    @Override
1391c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi    public boolean equals(Object o) {
1401c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi        if (this == o) return true;
1411c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi        if (!(o instanceof PrevWordsInfo)) return false;
1421c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi        final PrevWordsInfo prevWordsInfo = (PrevWordsInfo)o;
1431c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi        return Arrays.equals(mPrevWordsInfo, prevWordsInfo.mPrevWordsInfo);
1441c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi    }
1451c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi
1461c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi    @Override
147dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi    public String toString() {
148e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        final StringBuffer builder = new StringBuffer();
149e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        for (int i = 0; i < mPrevWordsInfo.length; i++) {
150e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            final WordInfo wordInfo = mPrevWordsInfo[i];
151e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            builder.append("PrevWord[");
152e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            builder.append(i);
153e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            builder.append("]: ");
1541c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi            if (wordInfo == null || !wordInfo.isValid()) {
155e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi                builder.append("Empty. ");
156e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi                continue;
157e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            }
158e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            builder.append(wordInfo.mWord);
159e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            builder.append(", isBeginningOfSentence: ");
160e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            builder.append(wordInfo.mIsBeginningOfSentence);
161e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            builder.append(". ");
162e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        }
163e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        return builder.toString();
164dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi    }
16583c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi}
166