NgramContext.java revision 9290d0a4eba454b9b1501830a4e470005cc85332
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin;
18
19import android.text.TextUtils;
20
21import com.android.inputmethod.annotations.UsedForTesting;
22import com.android.inputmethod.latin.utils.StringUtils;
23
24import java.util.Arrays;
25
26/**
27 * Class to represent information of previous words. This class is used to add n-gram entries
28 * into binary dictionaries, to get predictions, and to get suggestions.
29 */
30public class NgramContext {
31    public static final NgramContext EMPTY_PREV_WORDS_INFO =
32            new NgramContext(WordInfo.EMPTY_WORD_INFO);
33    public static final NgramContext BEGINNING_OF_SENTENCE =
34            new NgramContext(WordInfo.BEGINNING_OF_SENTENCE);
35
36    /**
37     * Word information used to represent previous words information.
38     */
39    public static class WordInfo {
40        public static final WordInfo EMPTY_WORD_INFO = new WordInfo(null);
41        public static final WordInfo BEGINNING_OF_SENTENCE = new WordInfo();
42
43        // This is an empty char sequence when mIsBeginningOfSentence is true.
44        public final CharSequence mWord;
45        // TODO: Have sentence separator.
46        // Whether the current context is beginning of sentence or not. This is true when composing
47        // at the beginning of an input field or composing a word after a sentence separator.
48        public final boolean mIsBeginningOfSentence;
49
50        // Beginning of sentence.
51        public WordInfo() {
52            mWord = "";
53            mIsBeginningOfSentence = true;
54        }
55
56        public WordInfo(final CharSequence word) {
57            mWord = word;
58            mIsBeginningOfSentence = false;
59        }
60
61        public boolean isValid() {
62            return mWord != null;
63        }
64
65        @Override
66        public int hashCode() {
67            return Arrays.hashCode(new Object[] { mWord, mIsBeginningOfSentence } );
68        }
69
70        @Override
71        public boolean equals(Object o) {
72            if (this == o) return true;
73            if (!(o instanceof WordInfo)) return false;
74            final WordInfo wordInfo = (WordInfo)o;
75            if (mWord == null || wordInfo.mWord == null) {
76                return mWord == wordInfo.mWord
77                        && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence;
78            }
79            return TextUtils.equals(mWord, wordInfo.mWord)
80                    && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence;
81        }
82    }
83
84    // The words immediately before the considered word. EMPTY_WORD_INFO element means we don't
85    // have any context for that previous word including the "beginning of sentence context" - we
86    // just don't know what to predict using the information. An example of that is after a comma.
87    // For simplicity of implementation, elements may also be EMPTY_WORD_INFO transiently after the
88    // WordComposer was reset and before starting a new composing word, but we should never be
89    // calling getSuggetions* in this situation.
90    private final WordInfo[] mPrevWordsInfo;
91    private final int mPrevWordsCount;
92
93    // Construct from the previous word information.
94    public NgramContext(final WordInfo... prevWordsInfo) {
95        mPrevWordsInfo = prevWordsInfo;
96        mPrevWordsCount = prevWordsInfo.length;
97    }
98
99    // Construct from WordInfo array and size. The caller shouldn't change prevWordsInfo after
100    // calling this method.
101    private NgramContext(final NgramContext ngramContext, final int prevWordsCount) {
102        if (ngramContext.mPrevWordsCount < prevWordsCount) {
103            throw new IndexOutOfBoundsException("ngramContext.mPrevWordsCount ("
104                    + ngramContext.mPrevWordsCount + ") is smaller than prevWordsCount ("
105                    + prevWordsCount + ")");
106        }
107        mPrevWordsInfo = ngramContext.mPrevWordsInfo;
108        mPrevWordsCount = prevWordsCount;
109    }
110
111    // Create next prevWordsInfo using current prevWordsInfo.
112    public NgramContext getNextNgramContext(final WordInfo wordInfo) {
113        final int nextPrevWordCount = Math.min(Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM,
114                mPrevWordsCount + 1);
115        final WordInfo[] prevWordsInfo = new WordInfo[nextPrevWordCount];
116        prevWordsInfo[0] = wordInfo;
117        System.arraycopy(mPrevWordsInfo, 0, prevWordsInfo, 1, nextPrevWordCount - 1);
118        return new NgramContext(prevWordsInfo);
119    }
120
121    public boolean isValid() {
122        return mPrevWordsCount > 0 && mPrevWordsInfo[0].isValid();
123    }
124
125    public boolean isBeginningOfSentenceContext() {
126        return mPrevWordsCount > 0 && mPrevWordsInfo[0].mIsBeginningOfSentence;
127    }
128
129    // n is 1-indexed.
130    // TODO: Remove
131    public CharSequence getNthPrevWord(final int n) {
132        if (n <= 0 || n > mPrevWordsCount) {
133            return null;
134        }
135        return mPrevWordsInfo[n - 1].mWord;
136    }
137
138    // n is 1-indexed.
139    @UsedForTesting
140    public boolean isNthPrevWordBeginningOfSontence(final int n) {
141        if (n <= 0 || n > mPrevWordsCount) {
142            return false;
143        }
144        return mPrevWordsInfo[n - 1].mIsBeginningOfSentence;
145    }
146
147    public void outputToArray(final int[][] codePointArrays,
148            final boolean[] isBeginningOfSentenceArray) {
149        for (int i = 0; i < mPrevWordsCount; i++) {
150            final WordInfo wordInfo = mPrevWordsInfo[i];
151            if (wordInfo == null || !wordInfo.isValid()) {
152                codePointArrays[i] = new int[0];
153                isBeginningOfSentenceArray[i] = false;
154                continue;
155            }
156            codePointArrays[i] = StringUtils.toCodePointArray(wordInfo.mWord);
157            isBeginningOfSentenceArray[i] = wordInfo.mIsBeginningOfSentence;
158        }
159    }
160
161    public int getPrevWordCount() {
162        return mPrevWordsCount;
163    }
164
165    @Override
166    public int hashCode() {
167        int hashValue = 0;
168        for (final WordInfo wordInfo : mPrevWordsInfo) {
169            if (wordInfo == null || !WordInfo.EMPTY_WORD_INFO.equals(wordInfo)) {
170                break;
171            }
172            hashValue ^= wordInfo.hashCode();
173        }
174        return hashValue;
175    }
176
177    @Override
178    public boolean equals(Object o) {
179        if (this == o) return true;
180        if (!(o instanceof NgramContext)) return false;
181        final NgramContext prevWordsInfo = (NgramContext)o;
182
183        final int minLength = Math.min(mPrevWordsCount, prevWordsInfo.mPrevWordsCount);
184        for (int i = 0; i < minLength; i++) {
185            if (!mPrevWordsInfo[i].equals(prevWordsInfo.mPrevWordsInfo[i])) {
186                return false;
187            }
188        }
189        final WordInfo[] longerWordsInfo;
190        final int longerWordsInfoCount;
191        if (mPrevWordsCount > prevWordsInfo.mPrevWordsCount) {
192            longerWordsInfo = mPrevWordsInfo;
193            longerWordsInfoCount = mPrevWordsCount;
194        } else {
195            longerWordsInfo = prevWordsInfo.mPrevWordsInfo;
196            longerWordsInfoCount = prevWordsInfo.mPrevWordsCount;
197        }
198        for (int i = minLength; i < longerWordsInfoCount; i++) {
199            if (longerWordsInfo[i] != null
200                    && !WordInfo.EMPTY_WORD_INFO.equals(longerWordsInfo[i])) {
201                return false;
202            }
203        }
204        return true;
205    }
206
207    @Override
208    public String toString() {
209        final StringBuffer builder = new StringBuffer();
210        for (int i = 0; i < mPrevWordsCount; i++) {
211            final WordInfo wordInfo = mPrevWordsInfo[i];
212            builder.append("PrevWord[");
213            builder.append(i);
214            builder.append("]: ");
215            if (wordInfo == null) {
216                builder.append("null. ");
217                continue;
218            }
219            if (!wordInfo.isValid()) {
220                builder.append("Empty. ");
221                continue;
222            }
223            builder.append(wordInfo.mWord);
224            builder.append(", isBeginningOfSentence: ");
225            builder.append(wordInfo.mIsBeginningOfSentence);
226            builder.append(". ");
227        }
228        return builder.toString();
229    }
230}
231