183c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi/*
283c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * Copyright (C) 2014 The Android Open Source Project
383c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi *
483c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * Licensed under the Apache License, Version 2.0 (the "License");
583c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * you may not use this file except in compliance with the License.
683c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * You may obtain a copy of the License at
783c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi *
883c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi *      http://www.apache.org/licenses/LICENSE-2.0
983c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi *
1083c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * Unless required by applicable law or agreed to in writing, software
1183c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * distributed under the License is distributed on an "AS IS" BASIS,
1283c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1383c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * See the License for the specific language governing permissions and
1483c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * limitations under the License.
1583c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi */
1683c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi
1783c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagipackage com.android.inputmethod.latin;
1883c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi
1986f36003fd4397143bd37938dda029e5707634afYohei Yukawaimport android.text.TextUtils;
20e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi
21bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagiimport com.android.inputmethod.annotations.UsedForTesting;
224beeb9253a06482299e0c67467531d30436a02fcJean Chalardimport com.android.inputmethod.latin.common.StringUtils;
230f7d881dc72132dfd75c8b4fe61a69fc5cdcd460Mohammadinamul Sheikimport com.android.inputmethod.latin.define.DecoderSpecificConstants;
24e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi
25b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheikimport java.util.ArrayList;
2686f36003fd4397143bd37938dda029e5707634afYohei Yukawaimport java.util.Arrays;
2786f36003fd4397143bd37938dda029e5707634afYohei Yukawa
28d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8Tadashi G. Takaokaimport javax.annotation.Nonnull;
29d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8Tadashi G. Takaoka
30a790c5b68324da41428aeb68594d43ca5632f66dKeisuke Kuroyanagi/**
31a790c5b68324da41428aeb68594d43ca5632f66dKeisuke Kuroyanagi * Class to represent information of previous words. This class is used to add n-gram entries
32a790c5b68324da41428aeb68594d43ca5632f66dKeisuke Kuroyanagi * into binary dictionaries, to get predictions, and to get suggestions.
33a790c5b68324da41428aeb68594d43ca5632f66dKeisuke Kuroyanagi */
34bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagipublic class NgramContext {
35d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8Tadashi G. Takaoka    @Nonnull
36bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi    public static final NgramContext EMPTY_PREV_WORDS_INFO =
37bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi            new NgramContext(WordInfo.EMPTY_WORD_INFO);
38d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8Tadashi G. Takaoka    @Nonnull
39bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi    public static final NgramContext BEGINNING_OF_SENTENCE =
405f00fe09e9a611b647592188316e5999465df4d3Tadashi G. Takaoka            new NgramContext(WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO);
411adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi
42b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik    public static final String BEGINNING_OF_SENTENCE_TAG = "<S>";
43b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik
44b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik    public static final String CONTEXT_SEPARATOR = " ";
45b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik
46f4686092232588781910cc4e64406c4958577e86Mohammadinamul Sheik    public static NgramContext getEmptyPrevWordsContext(int maxPrevWordCount) {
47f4686092232588781910cc4e64406c4958577e86Mohammadinamul Sheik        return new NgramContext(maxPrevWordCount, WordInfo.EMPTY_WORD_INFO);
48f4686092232588781910cc4e64406c4958577e86Mohammadinamul Sheik    }
49f4686092232588781910cc4e64406c4958577e86Mohammadinamul Sheik
50e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    /**
51e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi     * Word information used to represent previous words information.
52e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi     */
53e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    public static class WordInfo {
54d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8Tadashi G. Takaoka        @Nonnull
55e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        public static final WordInfo EMPTY_WORD_INFO = new WordInfo(null);
56d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8Tadashi G. Takaoka        @Nonnull
575f00fe09e9a611b647592188316e5999465df4d3Tadashi G. Takaoka        public static final WordInfo BEGINNING_OF_SENTENCE_WORD_INFO = new WordInfo();
58e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi
5986f36003fd4397143bd37938dda029e5707634afYohei Yukawa        // This is an empty char sequence when mIsBeginningOfSentence is true.
6086f36003fd4397143bd37938dda029e5707634afYohei Yukawa        public final CharSequence mWord;
61e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        // TODO: Have sentence separator.
62e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        // Whether the current context is beginning of sentence or not. This is true when composing
63e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        // at the beginning of an input field or composing a word after a sentence separator.
64e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        public final boolean mIsBeginningOfSentence;
65e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi
66e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        // Beginning of sentence.
675f00fe09e9a611b647592188316e5999465df4d3Tadashi G. Takaoka        private WordInfo() {
68e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            mWord = "";
69e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            mIsBeginningOfSentence = true;
70e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        }
71e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi
7286f36003fd4397143bd37938dda029e5707634afYohei Yukawa        public WordInfo(final CharSequence word) {
73e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            mWord = word;
74e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            mIsBeginningOfSentence = false;
75e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        }
76e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi
77e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        public boolean isValid() {
78e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            return mWord != null;
79e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        }
801c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi
811c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi        @Override
821c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi        public int hashCode() {
831c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi            return Arrays.hashCode(new Object[] { mWord, mIsBeginningOfSentence } );
841c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi        }
851c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi
861c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi        @Override
871c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi        public boolean equals(Object o) {
881c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi            if (this == o) return true;
891c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi            if (!(o instanceof WordInfo)) return false;
901c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi            final WordInfo wordInfo = (WordInfo)o;
911c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi            if (mWord == null || wordInfo.mWord == null) {
921c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi                return mWord == wordInfo.mWord
931c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi                        && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence;
941c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi            }
9586f36003fd4397143bd37938dda029e5707634afYohei Yukawa            return TextUtils.equals(mWord, wordInfo.mWord)
961c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi                    && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence;
971c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi        }
98e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    }
9983c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi
100e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    // The words immediately before the considered word. EMPTY_WORD_INFO element means we don't
101e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    // have any context for that previous word including the "beginning of sentence context" - we
102e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    // just don't know what to predict using the information. An example of that is after a comma.
103e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    // For simplicity of implementation, elements may also be EMPTY_WORD_INFO transiently after the
104e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    // WordComposer was reset and before starting a new composing word, but we should never be
105e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    // calling getSuggetions* in this situation.
106bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi    private final WordInfo[] mPrevWordsInfo;
107bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi    private final int mPrevWordsCount;
10817f326b7458c2bde2569e283a96e703755485328Keisuke Kuroyanagi
109f4686092232588781910cc4e64406c4958577e86Mohammadinamul Sheik    private final int mMaxPrevWordCount;
110f4686092232588781910cc4e64406c4958577e86Mohammadinamul Sheik
111e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    // Construct from the previous word information.
112bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi    public NgramContext(final WordInfo... prevWordsInfo) {
113f4686092232588781910cc4e64406c4958577e86Mohammadinamul Sheik        this(DecoderSpecificConstants.MAX_PREV_WORD_COUNT_FOR_N_GRAM, prevWordsInfo);
114f4686092232588781910cc4e64406c4958577e86Mohammadinamul Sheik    }
115f4686092232588781910cc4e64406c4958577e86Mohammadinamul Sheik
116f4686092232588781910cc4e64406c4958577e86Mohammadinamul Sheik    public NgramContext(final int maxPrevWordCount, final WordInfo... prevWordsInfo) {
117bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi        mPrevWordsInfo = prevWordsInfo;
118bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi        mPrevWordsCount = prevWordsInfo.length;
119f4686092232588781910cc4e64406c4958577e86Mohammadinamul Sheik        mMaxPrevWordCount = maxPrevWordCount;
120e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    }
121e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi
122a7805e9870430eac3049129d47bebb312d457477Jatin Matani    /**
123a7805e9870430eac3049129d47bebb312d457477Jatin Matani     * Create next prevWordsInfo using current prevWordsInfo.
124a7805e9870430eac3049129d47bebb312d457477Jatin Matani     */
125d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8Tadashi G. Takaoka    @Nonnull
126bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi    public NgramContext getNextNgramContext(final WordInfo wordInfo) {
127f4686092232588781910cc4e64406c4958577e86Mohammadinamul Sheik        final int nextPrevWordCount = Math.min(mMaxPrevWordCount, mPrevWordsCount + 1);
1284466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi        final WordInfo[] prevWordsInfo = new WordInfo[nextPrevWordCount];
129e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        prevWordsInfo[0] = wordInfo;
130bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi        System.arraycopy(mPrevWordsInfo, 0, prevWordsInfo, 1, nextPrevWordCount - 1);
131ea727b6b71398c9f78fbc7b79d3f0c73d301b938Mohammadinamul Sheik        return new NgramContext(mMaxPrevWordCount, prevWordsInfo);
13283c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi    }
1331adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi
134b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik
135b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik    /**
136b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik     * Extracts the previous words context.
137b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik     *
138b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik     * @return a String with the previous words separated by white space.
139b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik     */
140b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik    public String extractPrevWordsContext() {
141b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik        final ArrayList<String> terms = new ArrayList<>();
142b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik        for (int i = mPrevWordsInfo.length - 1; i >= 0; --i) {
143b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik            if (mPrevWordsInfo[i] != null && mPrevWordsInfo[i].isValid()) {
144b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik                final NgramContext.WordInfo wordInfo = mPrevWordsInfo[i];
145b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik                if (wordInfo.mIsBeginningOfSentence) {
146b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik                    terms.add(BEGINNING_OF_SENTENCE_TAG);
147b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik                } else {
148b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik                    final String term = wordInfo.mWord.toString();
149b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik                    if (!term.isEmpty()) {
150b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik                        terms.add(term);
151b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik                    }
152b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik                }
153b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik            }
154b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik        }
15521a28abc6ac3a2404e7b90327122b1364a283ec9Chieu Nguyen        return TextUtils.join(CONTEXT_SEPARATOR, terms);
156b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik    }
157b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik
158a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani    /**
159a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani     * Extracts the previous words context.
160a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani     *
161a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani     * @return a String array with the previous words.
162a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani     */
163a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani    public String[] extractPrevWordsContextArray() {
164a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani        final ArrayList<String> prevTermList = new ArrayList<>();
165a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani        for (int i = mPrevWordsInfo.length - 1; i >= 0; --i) {
166a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani            if (mPrevWordsInfo[i] != null && mPrevWordsInfo[i].isValid()) {
167a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani                final NgramContext.WordInfo wordInfo = mPrevWordsInfo[i];
168a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani                if (wordInfo.mIsBeginningOfSentence) {
169a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani                    prevTermList.add(BEGINNING_OF_SENTENCE_TAG);
170a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani                } else {
171a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani                    final String term = wordInfo.mWord.toString();
172a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani                    if (!term.isEmpty()) {
173a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani                        prevTermList.add(term);
174a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani                    }
175a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani                }
176a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani            }
177a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani        }
17821a28abc6ac3a2404e7b90327122b1364a283ec9Chieu Nguyen        final String[] contextStringArray = prevTermList.toArray(new String[prevTermList.size()]);
179a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani        return contextStringArray;
180a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani    }
181a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani
1821adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi    public boolean isValid() {
183bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi        return mPrevWordsCount > 0 && mPrevWordsInfo[0].isValid();
184bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi    }
185bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi
186bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi    public boolean isBeginningOfSentenceContext() {
187bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi        return mPrevWordsCount > 0 && mPrevWordsInfo[0].mIsBeginningOfSentence;
188bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi    }
189bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi
190bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi    // n is 1-indexed.
191bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi    // TODO: Remove
192bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi    public CharSequence getNthPrevWord(final int n) {
193bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi        if (n <= 0 || n > mPrevWordsCount) {
194bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi            return null;
195bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi        }
196bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi        return mPrevWordsInfo[n - 1].mWord;
197bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi    }
198bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi
199bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi    // n is 1-indexed.
200bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi    @UsedForTesting
20111a3965f8c376db4d8fbdf3c6ea6ac54550ae6edAdrian Velicu    public boolean isNthPrevWordBeginningOfSentence(final int n) {
202bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi        if (n <= 0 || n > mPrevWordsCount) {
203bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi            return false;
204bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi        }
205bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi        return mPrevWordsInfo[n - 1].mIsBeginningOfSentence;
206e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    }
207e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi
208e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi    public void outputToArray(final int[][] codePointArrays,
209e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            final boolean[] isBeginningOfSentenceArray) {
210bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi        for (int i = 0; i < mPrevWordsCount; i++) {
211e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            final WordInfo wordInfo = mPrevWordsInfo[i];
212e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            if (wordInfo == null || !wordInfo.isValid()) {
213e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi                codePointArrays[i] = new int[0];
214e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi                isBeginningOfSentenceArray[i] = false;
215e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi                continue;
216e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            }
217e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            codePointArrays[i] = StringUtils.toCodePointArray(wordInfo.mWord);
218e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            isBeginningOfSentenceArray[i] = wordInfo.mIsBeginningOfSentence;
219e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        }
2201adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi    }
221dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi
2224466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi    public int getPrevWordCount() {
223bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi        return mPrevWordsCount;
2244466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi    }
2254466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi
226dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi    @Override
2271c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi    public int hashCode() {
228c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi        int hashValue = 0;
229c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi        for (final WordInfo wordInfo : mPrevWordsInfo) {
230c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi            if (wordInfo == null || !WordInfo.EMPTY_WORD_INFO.equals(wordInfo)) {
231c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi                break;
232c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi            }
233c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi            hashValue ^= wordInfo.hashCode();
234c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi        }
235c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi        return hashValue;
2361c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi    }
2371c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi
2381c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi    @Override
2391c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi    public boolean equals(Object o) {
2401c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi        if (this == o) return true;
241bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi        if (!(o instanceof NgramContext)) return false;
242bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi        final NgramContext prevWordsInfo = (NgramContext)o;
243845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi
244bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi        final int minLength = Math.min(mPrevWordsCount, prevWordsInfo.mPrevWordsCount);
245845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi        for (int i = 0; i < minLength; i++) {
246845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi            if (!mPrevWordsInfo[i].equals(prevWordsInfo.mPrevWordsInfo[i])) {
247845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi                return false;
248845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi            }
249845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi        }
250bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi        final WordInfo[] longerWordsInfo;
251bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi        final int longerWordsInfoCount;
252bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi        if (mPrevWordsCount > prevWordsInfo.mPrevWordsCount) {
253bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi            longerWordsInfo = mPrevWordsInfo;
254bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi            longerWordsInfoCount = mPrevWordsCount;
255bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi        } else {
256bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi            longerWordsInfo = prevWordsInfo.mPrevWordsInfo;
257bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi            longerWordsInfoCount = prevWordsInfo.mPrevWordsCount;
258bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi        }
259bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi        for (int i = minLength; i < longerWordsInfoCount; i++) {
260845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi            if (longerWordsInfo[i] != null
261845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi                    && !WordInfo.EMPTY_WORD_INFO.equals(longerWordsInfo[i])) {
262845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi                return false;
263845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi            }
264845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi        }
265845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi        return true;
2661c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi    }
2671c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi
2681c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi    @Override
269dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi    public String toString() {
270e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        final StringBuffer builder = new StringBuffer();
271bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi        for (int i = 0; i < mPrevWordsCount; i++) {
272e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            final WordInfo wordInfo = mPrevWordsInfo[i];
273e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            builder.append("PrevWord[");
274e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            builder.append(i);
275e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            builder.append("]: ");
276845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi            if (wordInfo == null) {
277845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi                builder.append("null. ");
278845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi                continue;
279845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi            }
280845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi            if (!wordInfo.isValid()) {
281e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi                builder.append("Empty. ");
282e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi                continue;
283e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            }
284e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            builder.append(wordInfo.mWord);
285e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            builder.append(", isBeginningOfSentence: ");
286e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            builder.append(wordInfo.mIsBeginningOfSentence);
287e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi            builder.append(". ");
288e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        }
289e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi        return builder.toString();
290dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi    }
29183c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi}
292