NgramContext.java revision d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin;
18
19import android.text.TextUtils;
20
21import com.android.inputmethod.annotations.UsedForTesting;
22import com.android.inputmethod.latin.utils.StringUtils;
23
24import java.util.Arrays;
25
26import javax.annotation.Nonnull;
27
28/**
29 * Class to represent information of previous words. This class is used to add n-gram entries
30 * into binary dictionaries, to get predictions, and to get suggestions.
31 */
32public class NgramContext {
33    @Nonnull
34    public static final NgramContext EMPTY_PREV_WORDS_INFO =
35            new NgramContext(WordInfo.EMPTY_WORD_INFO);
36    @Nonnull
37    public static final NgramContext BEGINNING_OF_SENTENCE =
38            new NgramContext(WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO);
39
40    /**
41     * Word information used to represent previous words information.
42     */
43    public static class WordInfo {
44        @Nonnull
45        public static final WordInfo EMPTY_WORD_INFO = new WordInfo(null);
46        @Nonnull
47        public static final WordInfo BEGINNING_OF_SENTENCE_WORD_INFO = new WordInfo();
48
49        // This is an empty char sequence when mIsBeginningOfSentence is true.
50        public final CharSequence mWord;
51        // TODO: Have sentence separator.
52        // Whether the current context is beginning of sentence or not. This is true when composing
53        // at the beginning of an input field or composing a word after a sentence separator.
54        public final boolean mIsBeginningOfSentence;
55
56        // Beginning of sentence.
57        private WordInfo() {
58            mWord = "";
59            mIsBeginningOfSentence = true;
60        }
61
62        public WordInfo(final CharSequence word) {
63            mWord = word;
64            mIsBeginningOfSentence = false;
65        }
66
67        public boolean isValid() {
68            return mWord != null;
69        }
70
71        @Override
72        public int hashCode() {
73            return Arrays.hashCode(new Object[] { mWord, mIsBeginningOfSentence } );
74        }
75
76        @Override
77        public boolean equals(Object o) {
78            if (this == o) return true;
79            if (!(o instanceof WordInfo)) return false;
80            final WordInfo wordInfo = (WordInfo)o;
81            if (mWord == null || wordInfo.mWord == null) {
82                return mWord == wordInfo.mWord
83                        && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence;
84            }
85            return TextUtils.equals(mWord, wordInfo.mWord)
86                    && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence;
87        }
88    }
89
90    // The words immediately before the considered word. EMPTY_WORD_INFO element means we don't
91    // have any context for that previous word including the "beginning of sentence context" - we
92    // just don't know what to predict using the information. An example of that is after a comma.
93    // For simplicity of implementation, elements may also be EMPTY_WORD_INFO transiently after the
94    // WordComposer was reset and before starting a new composing word, but we should never be
95    // calling getSuggetions* in this situation.
96    private final WordInfo[] mPrevWordsInfo;
97    private final int mPrevWordsCount;
98
99    // Construct from the previous word information.
100    public NgramContext(final WordInfo... prevWordsInfo) {
101        mPrevWordsInfo = prevWordsInfo;
102        mPrevWordsCount = prevWordsInfo.length;
103    }
104
105    // Create next prevWordsInfo using current prevWordsInfo.
106    @Nonnull
107    public NgramContext getNextNgramContext(final WordInfo wordInfo) {
108        final int nextPrevWordCount = Math.min(Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM,
109                mPrevWordsCount + 1);
110        final WordInfo[] prevWordsInfo = new WordInfo[nextPrevWordCount];
111        prevWordsInfo[0] = wordInfo;
112        System.arraycopy(mPrevWordsInfo, 0, prevWordsInfo, 1, nextPrevWordCount - 1);
113        return new NgramContext(prevWordsInfo);
114    }
115
116    public boolean isValid() {
117        return mPrevWordsCount > 0 && mPrevWordsInfo[0].isValid();
118    }
119
120    public boolean isBeginningOfSentenceContext() {
121        return mPrevWordsCount > 0 && mPrevWordsInfo[0].mIsBeginningOfSentence;
122    }
123
124    // n is 1-indexed.
125    // TODO: Remove
126    public CharSequence getNthPrevWord(final int n) {
127        if (n <= 0 || n > mPrevWordsCount) {
128            return null;
129        }
130        return mPrevWordsInfo[n - 1].mWord;
131    }
132
133    // n is 1-indexed.
134    @UsedForTesting
135    public boolean isNthPrevWordBeginningOfSontence(final int n) {
136        if (n <= 0 || n > mPrevWordsCount) {
137            return false;
138        }
139        return mPrevWordsInfo[n - 1].mIsBeginningOfSentence;
140    }
141
142    public void outputToArray(final int[][] codePointArrays,
143            final boolean[] isBeginningOfSentenceArray) {
144        for (int i = 0; i < mPrevWordsCount; i++) {
145            final WordInfo wordInfo = mPrevWordsInfo[i];
146            if (wordInfo == null || !wordInfo.isValid()) {
147                codePointArrays[i] = new int[0];
148                isBeginningOfSentenceArray[i] = false;
149                continue;
150            }
151            codePointArrays[i] = StringUtils.toCodePointArray(wordInfo.mWord);
152            isBeginningOfSentenceArray[i] = wordInfo.mIsBeginningOfSentence;
153        }
154    }
155
156    public int getPrevWordCount() {
157        return mPrevWordsCount;
158    }
159
160    @Override
161    public int hashCode() {
162        int hashValue = 0;
163        for (final WordInfo wordInfo : mPrevWordsInfo) {
164            if (wordInfo == null || !WordInfo.EMPTY_WORD_INFO.equals(wordInfo)) {
165                break;
166            }
167            hashValue ^= wordInfo.hashCode();
168        }
169        return hashValue;
170    }
171
172    @Override
173    public boolean equals(Object o) {
174        if (this == o) return true;
175        if (!(o instanceof NgramContext)) return false;
176        final NgramContext prevWordsInfo = (NgramContext)o;
177
178        final int minLength = Math.min(mPrevWordsCount, prevWordsInfo.mPrevWordsCount);
179        for (int i = 0; i < minLength; i++) {
180            if (!mPrevWordsInfo[i].equals(prevWordsInfo.mPrevWordsInfo[i])) {
181                return false;
182            }
183        }
184        final WordInfo[] longerWordsInfo;
185        final int longerWordsInfoCount;
186        if (mPrevWordsCount > prevWordsInfo.mPrevWordsCount) {
187            longerWordsInfo = mPrevWordsInfo;
188            longerWordsInfoCount = mPrevWordsCount;
189        } else {
190            longerWordsInfo = prevWordsInfo.mPrevWordsInfo;
191            longerWordsInfoCount = prevWordsInfo.mPrevWordsCount;
192        }
193        for (int i = minLength; i < longerWordsInfoCount; i++) {
194            if (longerWordsInfo[i] != null
195                    && !WordInfo.EMPTY_WORD_INFO.equals(longerWordsInfo[i])) {
196                return false;
197            }
198        }
199        return true;
200    }
201
202    @Override
203    public String toString() {
204        final StringBuffer builder = new StringBuffer();
205        for (int i = 0; i < mPrevWordsCount; i++) {
206            final WordInfo wordInfo = mPrevWordsInfo[i];
207            builder.append("PrevWord[");
208            builder.append(i);
209            builder.append("]: ");
210            if (wordInfo == null) {
211                builder.append("null. ");
212                continue;
213            }
214            if (!wordInfo.isValid()) {
215                builder.append("Empty. ");
216                continue;
217            }
218            builder.append(wordInfo.mWord);
219            builder.append(", isBeginningOfSentence: ");
220            builder.append(wordInfo.mIsBeginningOfSentence);
221            builder.append(". ");
222        }
223        return builder.toString();
224    }
225}
226