1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin;
18
19import android.text.TextUtils;
20
21import com.android.inputmethod.annotations.UsedForTesting;
22import com.android.inputmethod.latin.common.StringUtils;
23import com.android.inputmethod.latin.define.DecoderSpecificConstants;
24
25import java.util.ArrayList;
26import java.util.Arrays;
27
28import javax.annotation.Nonnull;
29
30/**
31 * Class to represent information of previous words. This class is used to add n-gram entries
32 * into binary dictionaries, to get predictions, and to get suggestions.
33 */
34public class NgramContext {
35    @Nonnull
36    public static final NgramContext EMPTY_PREV_WORDS_INFO =
37            new NgramContext(WordInfo.EMPTY_WORD_INFO);
38    @Nonnull
39    public static final NgramContext BEGINNING_OF_SENTENCE =
40            new NgramContext(WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO);
41
42    public static final String BEGINNING_OF_SENTENCE_TAG = "<S>";
43
44    public static final String CONTEXT_SEPARATOR = " ";
45
46    public static NgramContext getEmptyPrevWordsContext(int maxPrevWordCount) {
47        return new NgramContext(maxPrevWordCount, WordInfo.EMPTY_WORD_INFO);
48    }
49
50    /**
51     * Word information used to represent previous words information.
52     */
53    public static class WordInfo {
54        @Nonnull
55        public static final WordInfo EMPTY_WORD_INFO = new WordInfo(null);
56        @Nonnull
57        public static final WordInfo BEGINNING_OF_SENTENCE_WORD_INFO = new WordInfo();
58
59        // This is an empty char sequence when mIsBeginningOfSentence is true.
60        public final CharSequence mWord;
61        // TODO: Have sentence separator.
62        // Whether the current context is beginning of sentence or not. This is true when composing
63        // at the beginning of an input field or composing a word after a sentence separator.
64        public final boolean mIsBeginningOfSentence;
65
66        // Beginning of sentence.
67        private WordInfo() {
68            mWord = "";
69            mIsBeginningOfSentence = true;
70        }
71
72        public WordInfo(final CharSequence word) {
73            mWord = word;
74            mIsBeginningOfSentence = false;
75        }
76
77        public boolean isValid() {
78            return mWord != null;
79        }
80
81        @Override
82        public int hashCode() {
83            return Arrays.hashCode(new Object[] { mWord, mIsBeginningOfSentence } );
84        }
85
86        @Override
87        public boolean equals(Object o) {
88            if (this == o) return true;
89            if (!(o instanceof WordInfo)) return false;
90            final WordInfo wordInfo = (WordInfo)o;
91            if (mWord == null || wordInfo.mWord == null) {
92                return mWord == wordInfo.mWord
93                        && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence;
94            }
95            return TextUtils.equals(mWord, wordInfo.mWord)
96                    && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence;
97        }
98    }
99
100    // The words immediately before the considered word. EMPTY_WORD_INFO element means we don't
101    // have any context for that previous word including the "beginning of sentence context" - we
102    // just don't know what to predict using the information. An example of that is after a comma.
103    // For simplicity of implementation, elements may also be EMPTY_WORD_INFO transiently after the
104    // WordComposer was reset and before starting a new composing word, but we should never be
105    // calling getSuggetions* in this situation.
106    private final WordInfo[] mPrevWordsInfo;
107    private final int mPrevWordsCount;
108
109    private final int mMaxPrevWordCount;
110
111    // Construct from the previous word information.
112    public NgramContext(final WordInfo... prevWordsInfo) {
113        this(DecoderSpecificConstants.MAX_PREV_WORD_COUNT_FOR_N_GRAM, prevWordsInfo);
114    }
115
116    public NgramContext(final int maxPrevWordCount, final WordInfo... prevWordsInfo) {
117        mPrevWordsInfo = prevWordsInfo;
118        mPrevWordsCount = prevWordsInfo.length;
119        mMaxPrevWordCount = maxPrevWordCount;
120    }
121
122    /**
123     * Create next prevWordsInfo using current prevWordsInfo.
124     */
125    @Nonnull
126    public NgramContext getNextNgramContext(final WordInfo wordInfo) {
127        final int nextPrevWordCount = Math.min(mMaxPrevWordCount, mPrevWordsCount + 1);
128        final WordInfo[] prevWordsInfo = new WordInfo[nextPrevWordCount];
129        prevWordsInfo[0] = wordInfo;
130        System.arraycopy(mPrevWordsInfo, 0, prevWordsInfo, 1, nextPrevWordCount - 1);
131        return new NgramContext(mMaxPrevWordCount, prevWordsInfo);
132    }
133
134
135    /**
136     * Extracts the previous words context.
137     *
138     * @return a String with the previous words separated by white space.
139     */
140    public String extractPrevWordsContext() {
141        final ArrayList<String> terms = new ArrayList<>();
142        for (int i = mPrevWordsInfo.length - 1; i >= 0; --i) {
143            if (mPrevWordsInfo[i] != null && mPrevWordsInfo[i].isValid()) {
144                final NgramContext.WordInfo wordInfo = mPrevWordsInfo[i];
145                if (wordInfo.mIsBeginningOfSentence) {
146                    terms.add(BEGINNING_OF_SENTENCE_TAG);
147                } else {
148                    final String term = wordInfo.mWord.toString();
149                    if (!term.isEmpty()) {
150                        terms.add(term);
151                    }
152                }
153            }
154        }
155        return TextUtils.join(CONTEXT_SEPARATOR, terms);
156    }
157
158    /**
159     * Extracts the previous words context.
160     *
161     * @return a String array with the previous words.
162     */
163    public String[] extractPrevWordsContextArray() {
164        final ArrayList<String> prevTermList = new ArrayList<>();
165        for (int i = mPrevWordsInfo.length - 1; i >= 0; --i) {
166            if (mPrevWordsInfo[i] != null && mPrevWordsInfo[i].isValid()) {
167                final NgramContext.WordInfo wordInfo = mPrevWordsInfo[i];
168                if (wordInfo.mIsBeginningOfSentence) {
169                    prevTermList.add(BEGINNING_OF_SENTENCE_TAG);
170                } else {
171                    final String term = wordInfo.mWord.toString();
172                    if (!term.isEmpty()) {
173                        prevTermList.add(term);
174                    }
175                }
176            }
177        }
178        final String[] contextStringArray = prevTermList.toArray(new String[prevTermList.size()]);
179        return contextStringArray;
180    }
181
182    public boolean isValid() {
183        return mPrevWordsCount > 0 && mPrevWordsInfo[0].isValid();
184    }
185
186    public boolean isBeginningOfSentenceContext() {
187        return mPrevWordsCount > 0 && mPrevWordsInfo[0].mIsBeginningOfSentence;
188    }
189
190    // n is 1-indexed.
191    // TODO: Remove
192    public CharSequence getNthPrevWord(final int n) {
193        if (n <= 0 || n > mPrevWordsCount) {
194            return null;
195        }
196        return mPrevWordsInfo[n - 1].mWord;
197    }
198
199    // n is 1-indexed.
200    @UsedForTesting
201    public boolean isNthPrevWordBeginningOfSentence(final int n) {
202        if (n <= 0 || n > mPrevWordsCount) {
203            return false;
204        }
205        return mPrevWordsInfo[n - 1].mIsBeginningOfSentence;
206    }
207
208    public void outputToArray(final int[][] codePointArrays,
209            final boolean[] isBeginningOfSentenceArray) {
210        for (int i = 0; i < mPrevWordsCount; i++) {
211            final WordInfo wordInfo = mPrevWordsInfo[i];
212            if (wordInfo == null || !wordInfo.isValid()) {
213                codePointArrays[i] = new int[0];
214                isBeginningOfSentenceArray[i] = false;
215                continue;
216            }
217            codePointArrays[i] = StringUtils.toCodePointArray(wordInfo.mWord);
218            isBeginningOfSentenceArray[i] = wordInfo.mIsBeginningOfSentence;
219        }
220    }
221
222    public int getPrevWordCount() {
223        return mPrevWordsCount;
224    }
225
226    @Override
227    public int hashCode() {
228        int hashValue = 0;
229        for (final WordInfo wordInfo : mPrevWordsInfo) {
230            if (wordInfo == null || !WordInfo.EMPTY_WORD_INFO.equals(wordInfo)) {
231                break;
232            }
233            hashValue ^= wordInfo.hashCode();
234        }
235        return hashValue;
236    }
237
238    @Override
239    public boolean equals(Object o) {
240        if (this == o) return true;
241        if (!(o instanceof NgramContext)) return false;
242        final NgramContext prevWordsInfo = (NgramContext)o;
243
244        final int minLength = Math.min(mPrevWordsCount, prevWordsInfo.mPrevWordsCount);
245        for (int i = 0; i < minLength; i++) {
246            if (!mPrevWordsInfo[i].equals(prevWordsInfo.mPrevWordsInfo[i])) {
247                return false;
248            }
249        }
250        final WordInfo[] longerWordsInfo;
251        final int longerWordsInfoCount;
252        if (mPrevWordsCount > prevWordsInfo.mPrevWordsCount) {
253            longerWordsInfo = mPrevWordsInfo;
254            longerWordsInfoCount = mPrevWordsCount;
255        } else {
256            longerWordsInfo = prevWordsInfo.mPrevWordsInfo;
257            longerWordsInfoCount = prevWordsInfo.mPrevWordsCount;
258        }
259        for (int i = minLength; i < longerWordsInfoCount; i++) {
260            if (longerWordsInfo[i] != null
261                    && !WordInfo.EMPTY_WORD_INFO.equals(longerWordsInfo[i])) {
262                return false;
263            }
264        }
265        return true;
266    }
267
268    @Override
269    public String toString() {
270        final StringBuffer builder = new StringBuffer();
271        for (int i = 0; i < mPrevWordsCount; i++) {
272            final WordInfo wordInfo = mPrevWordsInfo[i];
273            builder.append("PrevWord[");
274            builder.append(i);
275            builder.append("]: ");
276            if (wordInfo == null) {
277                builder.append("null. ");
278                continue;
279            }
280            if (!wordInfo.isValid()) {
281                builder.append("Empty. ");
282                continue;
283            }
284            builder.append(wordInfo.mWord);
285            builder.append(", isBeginningOfSentence: ");
286            builder.append(wordInfo.mIsBeginningOfSentence);
287            builder.append(". ");
288        }
289        return builder.toString();
290    }
291}
292