NgramContext.java revision 5f00fe09e9a611b647592188316e5999465df4d3
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin;
18
19import android.text.TextUtils;
20
21import com.android.inputmethod.annotations.UsedForTesting;
22import com.android.inputmethod.latin.utils.StringUtils;
23
24import java.util.Arrays;
25
26/**
27 * Class to represent information of previous words. This class is used to add n-gram entries
28 * into binary dictionaries, to get predictions, and to get suggestions.
29 */
30public class NgramContext {
31    public static final NgramContext EMPTY_PREV_WORDS_INFO =
32            new NgramContext(WordInfo.EMPTY_WORD_INFO);
33    public static final NgramContext BEGINNING_OF_SENTENCE =
34            new NgramContext(WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO);
35
36    /**
37     * Word information used to represent previous words information.
38     */
39    public static class WordInfo {
40        public static final WordInfo EMPTY_WORD_INFO = new WordInfo(null);
41        public static final WordInfo BEGINNING_OF_SENTENCE_WORD_INFO = new WordInfo();
42
43        // This is an empty char sequence when mIsBeginningOfSentence is true.
44        public final CharSequence mWord;
45        // TODO: Have sentence separator.
46        // Whether the current context is beginning of sentence or not. This is true when composing
47        // at the beginning of an input field or composing a word after a sentence separator.
48        public final boolean mIsBeginningOfSentence;
49
50        // Beginning of sentence.
51        private WordInfo() {
52            mWord = "";
53            mIsBeginningOfSentence = true;
54        }
55
56        public WordInfo(final CharSequence word) {
57            mWord = word;
58            mIsBeginningOfSentence = false;
59        }
60
61        public boolean isValid() {
62            return mWord != null;
63        }
64
65        @Override
66        public int hashCode() {
67            return Arrays.hashCode(new Object[] { mWord, mIsBeginningOfSentence } );
68        }
69
70        @Override
71        public boolean equals(Object o) {
72            if (this == o) return true;
73            if (!(o instanceof WordInfo)) return false;
74            final WordInfo wordInfo = (WordInfo)o;
75            if (mWord == null || wordInfo.mWord == null) {
76                return mWord == wordInfo.mWord
77                        && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence;
78            }
79            return TextUtils.equals(mWord, wordInfo.mWord)
80                    && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence;
81        }
82    }
83
84    // The words immediately before the considered word. EMPTY_WORD_INFO element means we don't
85    // have any context for that previous word including the "beginning of sentence context" - we
86    // just don't know what to predict using the information. An example of that is after a comma.
87    // For simplicity of implementation, elements may also be EMPTY_WORD_INFO transiently after the
88    // WordComposer was reset and before starting a new composing word, but we should never be
89    // calling getSuggetions* in this situation.
90    private final WordInfo[] mPrevWordsInfo;
91    private final int mPrevWordsCount;
92
93    // Construct from the previous word information.
94    public NgramContext(final WordInfo... prevWordsInfo) {
95        mPrevWordsInfo = prevWordsInfo;
96        mPrevWordsCount = prevWordsInfo.length;
97    }
98
99    // Create next prevWordsInfo using current prevWordsInfo.
100    public NgramContext getNextNgramContext(final WordInfo wordInfo) {
101        final int nextPrevWordCount = Math.min(Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM,
102                mPrevWordsCount + 1);
103        final WordInfo[] prevWordsInfo = new WordInfo[nextPrevWordCount];
104        prevWordsInfo[0] = wordInfo;
105        System.arraycopy(mPrevWordsInfo, 0, prevWordsInfo, 1, nextPrevWordCount - 1);
106        return new NgramContext(prevWordsInfo);
107    }
108
109    public boolean isValid() {
110        return mPrevWordsCount > 0 && mPrevWordsInfo[0].isValid();
111    }
112
113    public boolean isBeginningOfSentenceContext() {
114        return mPrevWordsCount > 0 && mPrevWordsInfo[0].mIsBeginningOfSentence;
115    }
116
117    // n is 1-indexed.
118    // TODO: Remove
119    public CharSequence getNthPrevWord(final int n) {
120        if (n <= 0 || n > mPrevWordsCount) {
121            return null;
122        }
123        return mPrevWordsInfo[n - 1].mWord;
124    }
125
126    // n is 1-indexed.
127    @UsedForTesting
128    public boolean isNthPrevWordBeginningOfSontence(final int n) {
129        if (n <= 0 || n > mPrevWordsCount) {
130            return false;
131        }
132        return mPrevWordsInfo[n - 1].mIsBeginningOfSentence;
133    }
134
135    public void outputToArray(final int[][] codePointArrays,
136            final boolean[] isBeginningOfSentenceArray) {
137        for (int i = 0; i < mPrevWordsCount; i++) {
138            final WordInfo wordInfo = mPrevWordsInfo[i];
139            if (wordInfo == null || !wordInfo.isValid()) {
140                codePointArrays[i] = new int[0];
141                isBeginningOfSentenceArray[i] = false;
142                continue;
143            }
144            codePointArrays[i] = StringUtils.toCodePointArray(wordInfo.mWord);
145            isBeginningOfSentenceArray[i] = wordInfo.mIsBeginningOfSentence;
146        }
147    }
148
149    public int getPrevWordCount() {
150        return mPrevWordsCount;
151    }
152
153    @Override
154    public int hashCode() {
155        int hashValue = 0;
156        for (final WordInfo wordInfo : mPrevWordsInfo) {
157            if (wordInfo == null || !WordInfo.EMPTY_WORD_INFO.equals(wordInfo)) {
158                break;
159            }
160            hashValue ^= wordInfo.hashCode();
161        }
162        return hashValue;
163    }
164
165    @Override
166    public boolean equals(Object o) {
167        if (this == o) return true;
168        if (!(o instanceof NgramContext)) return false;
169        final NgramContext prevWordsInfo = (NgramContext)o;
170
171        final int minLength = Math.min(mPrevWordsCount, prevWordsInfo.mPrevWordsCount);
172        for (int i = 0; i < minLength; i++) {
173            if (!mPrevWordsInfo[i].equals(prevWordsInfo.mPrevWordsInfo[i])) {
174                return false;
175            }
176        }
177        final WordInfo[] longerWordsInfo;
178        final int longerWordsInfoCount;
179        if (mPrevWordsCount > prevWordsInfo.mPrevWordsCount) {
180            longerWordsInfo = mPrevWordsInfo;
181            longerWordsInfoCount = mPrevWordsCount;
182        } else {
183            longerWordsInfo = prevWordsInfo.mPrevWordsInfo;
184            longerWordsInfoCount = prevWordsInfo.mPrevWordsCount;
185        }
186        for (int i = minLength; i < longerWordsInfoCount; i++) {
187            if (longerWordsInfo[i] != null
188                    && !WordInfo.EMPTY_WORD_INFO.equals(longerWordsInfo[i])) {
189                return false;
190            }
191        }
192        return true;
193    }
194
195    @Override
196    public String toString() {
197        final StringBuffer builder = new StringBuffer();
198        for (int i = 0; i < mPrevWordsCount; i++) {
199            final WordInfo wordInfo = mPrevWordsInfo[i];
200            builder.append("PrevWord[");
201            builder.append(i);
202            builder.append("]: ");
203            if (wordInfo == null) {
204                builder.append("null. ");
205                continue;
206            }
207            if (!wordInfo.isValid()) {
208                builder.append("Empty. ");
209                continue;
210            }
211            builder.append(wordInfo.mWord);
212            builder.append(", isBeginningOfSentence: ");
213            builder.append(wordInfo.mIsBeginningOfSentence);
214            builder.append(". ");
215        }
216        return builder.toString();
217    }
218}
219