183c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi/* 283c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * Copyright (C) 2014 The Android Open Source Project 383c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * 483c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * Licensed under the Apache License, Version 2.0 (the "License"); 583c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * you may not use this file except in compliance with the License. 683c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * You may obtain a copy of the License at 783c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * 883c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * http://www.apache.org/licenses/LICENSE-2.0 983c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * 1083c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * Unless required by applicable law or agreed to in writing, software 1183c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * distributed under the License is distributed on an "AS IS" BASIS, 1283c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1383c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * See the License for the specific language governing permissions and 1483c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * limitations under the License. 1583c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi */ 1683c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi 1783c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagipackage com.android.inputmethod.latin; 1883c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi 1986f36003fd4397143bd37938dda029e5707634afYohei Yukawaimport android.text.TextUtils; 20e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 21bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagiimport com.android.inputmethod.annotations.UsedForTesting; 224beeb9253a06482299e0c67467531d30436a02fcJean Chalardimport com.android.inputmethod.latin.common.StringUtils; 230f7d881dc72132dfd75c8b4fe61a69fc5cdcd460Mohammadinamul Sheikimport com.android.inputmethod.latin.define.DecoderSpecificConstants; 24e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 25b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheikimport java.util.ArrayList; 2686f36003fd4397143bd37938dda029e5707634afYohei Yukawaimport java.util.Arrays; 2786f36003fd4397143bd37938dda029e5707634afYohei Yukawa 28d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8Tadashi G. Takaokaimport javax.annotation.Nonnull; 29d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8Tadashi G. Takaoka 30a790c5b68324da41428aeb68594d43ca5632f66dKeisuke Kuroyanagi/** 31a790c5b68324da41428aeb68594d43ca5632f66dKeisuke Kuroyanagi * Class to represent information of previous words. This class is used to add n-gram entries 32a790c5b68324da41428aeb68594d43ca5632f66dKeisuke Kuroyanagi * into binary dictionaries, to get predictions, and to get suggestions. 33a790c5b68324da41428aeb68594d43ca5632f66dKeisuke Kuroyanagi */ 34bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagipublic class NgramContext { 35d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8Tadashi G. Takaoka @Nonnull 36bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi public static final NgramContext EMPTY_PREV_WORDS_INFO = 37bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi new NgramContext(WordInfo.EMPTY_WORD_INFO); 38d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8Tadashi G. Takaoka @Nonnull 39bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi public static final NgramContext BEGINNING_OF_SENTENCE = 405f00fe09e9a611b647592188316e5999465df4d3Tadashi G. Takaoka new NgramContext(WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO); 411adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi 42b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik public static final String BEGINNING_OF_SENTENCE_TAG = "<S>"; 43b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik 44b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik public static final String CONTEXT_SEPARATOR = " "; 45b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik 46f4686092232588781910cc4e64406c4958577e86Mohammadinamul Sheik public static NgramContext getEmptyPrevWordsContext(int maxPrevWordCount) { 47f4686092232588781910cc4e64406c4958577e86Mohammadinamul Sheik return new NgramContext(maxPrevWordCount, WordInfo.EMPTY_WORD_INFO); 48f4686092232588781910cc4e64406c4958577e86Mohammadinamul Sheik } 49f4686092232588781910cc4e64406c4958577e86Mohammadinamul Sheik 50e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi /** 51e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi * Word information used to represent previous words information. 52e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi */ 53e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi public static class WordInfo { 54d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8Tadashi G. Takaoka @Nonnull 55e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi public static final WordInfo EMPTY_WORD_INFO = new WordInfo(null); 56d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8Tadashi G. Takaoka @Nonnull 575f00fe09e9a611b647592188316e5999465df4d3Tadashi G. Takaoka public static final WordInfo BEGINNING_OF_SENTENCE_WORD_INFO = new WordInfo(); 58e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 5986f36003fd4397143bd37938dda029e5707634afYohei Yukawa // This is an empty char sequence when mIsBeginningOfSentence is true. 6086f36003fd4397143bd37938dda029e5707634afYohei Yukawa public final CharSequence mWord; 61e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // TODO: Have sentence separator. 62e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // Whether the current context is beginning of sentence or not. This is true when composing 63e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // at the beginning of an input field or composing a word after a sentence separator. 64e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi public final boolean mIsBeginningOfSentence; 65e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 66e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // Beginning of sentence. 675f00fe09e9a611b647592188316e5999465df4d3Tadashi G. Takaoka private WordInfo() { 68e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi mWord = ""; 69e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi mIsBeginningOfSentence = true; 70e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 71e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 7286f36003fd4397143bd37938dda029e5707634afYohei Yukawa public WordInfo(final CharSequence word) { 73e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi mWord = word; 74e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi mIsBeginningOfSentence = false; 75e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 76e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 77e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi public boolean isValid() { 78e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi return mWord != null; 79e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 801c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi 811c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi @Override 821c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi public int hashCode() { 831c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi return Arrays.hashCode(new Object[] { mWord, mIsBeginningOfSentence } ); 841c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi } 851c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi 861c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi @Override 871c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi public boolean equals(Object o) { 881c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi if (this == o) return true; 891c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi if (!(o instanceof WordInfo)) return false; 901c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi final WordInfo wordInfo = (WordInfo)o; 911c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi if (mWord == null || wordInfo.mWord == null) { 921c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi return mWord == wordInfo.mWord 931c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence; 941c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi } 9586f36003fd4397143bd37938dda029e5707634afYohei Yukawa return TextUtils.equals(mWord, wordInfo.mWord) 961c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence; 971c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi } 98e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 9983c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi 100e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // The words immediately before the considered word. EMPTY_WORD_INFO element means we don't 101e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // have any context for that previous word including the "beginning of sentence context" - we 102e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // just don't know what to predict using the information. An example of that is after a comma. 103e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // For simplicity of implementation, elements may also be EMPTY_WORD_INFO transiently after the 104e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // WordComposer was reset and before starting a new composing word, but we should never be 105e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // calling getSuggetions* in this situation. 106bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi private final WordInfo[] mPrevWordsInfo; 107bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi private final int mPrevWordsCount; 10817f326b7458c2bde2569e283a96e703755485328Keisuke Kuroyanagi 109f4686092232588781910cc4e64406c4958577e86Mohammadinamul Sheik private final int mMaxPrevWordCount; 110f4686092232588781910cc4e64406c4958577e86Mohammadinamul Sheik 111e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // Construct from the previous word information. 112bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi public NgramContext(final WordInfo... prevWordsInfo) { 113f4686092232588781910cc4e64406c4958577e86Mohammadinamul Sheik this(DecoderSpecificConstants.MAX_PREV_WORD_COUNT_FOR_N_GRAM, prevWordsInfo); 114f4686092232588781910cc4e64406c4958577e86Mohammadinamul Sheik } 115f4686092232588781910cc4e64406c4958577e86Mohammadinamul Sheik 116f4686092232588781910cc4e64406c4958577e86Mohammadinamul Sheik public NgramContext(final int maxPrevWordCount, final WordInfo... prevWordsInfo) { 117bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi mPrevWordsInfo = prevWordsInfo; 118bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi mPrevWordsCount = prevWordsInfo.length; 119f4686092232588781910cc4e64406c4958577e86Mohammadinamul Sheik mMaxPrevWordCount = maxPrevWordCount; 120e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 121e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 122a7805e9870430eac3049129d47bebb312d457477Jatin Matani /** 123a7805e9870430eac3049129d47bebb312d457477Jatin Matani * Create next prevWordsInfo using current prevWordsInfo. 124a7805e9870430eac3049129d47bebb312d457477Jatin Matani */ 125d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8Tadashi G. Takaoka @Nonnull 126bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi public NgramContext getNextNgramContext(final WordInfo wordInfo) { 127f4686092232588781910cc4e64406c4958577e86Mohammadinamul Sheik final int nextPrevWordCount = Math.min(mMaxPrevWordCount, mPrevWordsCount + 1); 1284466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi final WordInfo[] prevWordsInfo = new WordInfo[nextPrevWordCount]; 129e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi prevWordsInfo[0] = wordInfo; 130bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi System.arraycopy(mPrevWordsInfo, 0, prevWordsInfo, 1, nextPrevWordCount - 1); 131ea727b6b71398c9f78fbc7b79d3f0c73d301b938Mohammadinamul Sheik return new NgramContext(mMaxPrevWordCount, prevWordsInfo); 13283c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi } 1331adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi 134b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik 135b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik /** 136b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik * Extracts the previous words context. 137b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik * 138b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik * @return a String with the previous words separated by white space. 139b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik */ 140b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik public String extractPrevWordsContext() { 141b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik final ArrayList<String> terms = new ArrayList<>(); 142b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik for (int i = mPrevWordsInfo.length - 1; i >= 0; --i) { 143b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik if (mPrevWordsInfo[i] != null && mPrevWordsInfo[i].isValid()) { 144b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik final NgramContext.WordInfo wordInfo = mPrevWordsInfo[i]; 145b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik if (wordInfo.mIsBeginningOfSentence) { 146b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik terms.add(BEGINNING_OF_SENTENCE_TAG); 147b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik } else { 148b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik final String term = wordInfo.mWord.toString(); 149b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik if (!term.isEmpty()) { 150b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik terms.add(term); 151b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik } 152b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik } 153b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik } 154b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik } 15521a28abc6ac3a2404e7b90327122b1364a283ec9Chieu Nguyen return TextUtils.join(CONTEXT_SEPARATOR, terms); 156b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik } 157b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik 158a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani /** 159a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani * Extracts the previous words context. 160a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani * 161a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani * @return a String array with the previous words. 162a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani */ 163a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani public String[] extractPrevWordsContextArray() { 164a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani final ArrayList<String> prevTermList = new ArrayList<>(); 165a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani for (int i = mPrevWordsInfo.length - 1; i >= 0; --i) { 166a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani if (mPrevWordsInfo[i] != null && mPrevWordsInfo[i].isValid()) { 167a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani final NgramContext.WordInfo wordInfo = mPrevWordsInfo[i]; 168a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani if (wordInfo.mIsBeginningOfSentence) { 169a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani prevTermList.add(BEGINNING_OF_SENTENCE_TAG); 170a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani } else { 171a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani final String term = wordInfo.mWord.toString(); 172a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani if (!term.isEmpty()) { 173a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani prevTermList.add(term); 174a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani } 175a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani } 176a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani } 177a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani } 17821a28abc6ac3a2404e7b90327122b1364a283ec9Chieu Nguyen final String[] contextStringArray = prevTermList.toArray(new String[prevTermList.size()]); 179a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani return contextStringArray; 180a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani } 181a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani 1821adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi public boolean isValid() { 183bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi return mPrevWordsCount > 0 && mPrevWordsInfo[0].isValid(); 184bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi } 185bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi 186bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi public boolean isBeginningOfSentenceContext() { 187bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi return mPrevWordsCount > 0 && mPrevWordsInfo[0].mIsBeginningOfSentence; 188bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi } 189bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi 190bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi // n is 1-indexed. 191bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi // TODO: Remove 192bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi public CharSequence getNthPrevWord(final int n) { 193bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi if (n <= 0 || n > mPrevWordsCount) { 194bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi return null; 195bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi } 196bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi return mPrevWordsInfo[n - 1].mWord; 197bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi } 198bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi 199bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi // n is 1-indexed. 200bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi @UsedForTesting 20111a3965f8c376db4d8fbdf3c6ea6ac54550ae6edAdrian Velicu public boolean isNthPrevWordBeginningOfSentence(final int n) { 202bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi if (n <= 0 || n > mPrevWordsCount) { 203bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi return false; 204bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi } 205bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi return mPrevWordsInfo[n - 1].mIsBeginningOfSentence; 206e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 207e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 208e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi public void outputToArray(final int[][] codePointArrays, 209e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi final boolean[] isBeginningOfSentenceArray) { 210bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi for (int i = 0; i < mPrevWordsCount; i++) { 211e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi final WordInfo wordInfo = mPrevWordsInfo[i]; 212e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi if (wordInfo == null || !wordInfo.isValid()) { 213e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi codePointArrays[i] = new int[0]; 214e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi isBeginningOfSentenceArray[i] = false; 215e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi continue; 216e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 217e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi codePointArrays[i] = StringUtils.toCodePointArray(wordInfo.mWord); 218e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi isBeginningOfSentenceArray[i] = wordInfo.mIsBeginningOfSentence; 219e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 2201adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi } 221dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi 2224466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi public int getPrevWordCount() { 223bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi return mPrevWordsCount; 2244466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi } 2254466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi 226dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi @Override 2271c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi public int hashCode() { 228c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi int hashValue = 0; 229c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi for (final WordInfo wordInfo : mPrevWordsInfo) { 230c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi if (wordInfo == null || !WordInfo.EMPTY_WORD_INFO.equals(wordInfo)) { 231c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi break; 232c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi } 233c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi hashValue ^= wordInfo.hashCode(); 234c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi } 235c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi return hashValue; 2361c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi } 2371c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi 2381c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi @Override 2391c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi public boolean equals(Object o) { 2401c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi if (this == o) return true; 241bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi if (!(o instanceof NgramContext)) return false; 242bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi final NgramContext prevWordsInfo = (NgramContext)o; 243845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi 244bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi final int minLength = Math.min(mPrevWordsCount, prevWordsInfo.mPrevWordsCount); 245845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi for (int i = 0; i < minLength; i++) { 246845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi if (!mPrevWordsInfo[i].equals(prevWordsInfo.mPrevWordsInfo[i])) { 247845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi return false; 248845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi } 249845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi } 250bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi final WordInfo[] longerWordsInfo; 251bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi final int longerWordsInfoCount; 252bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi if (mPrevWordsCount > prevWordsInfo.mPrevWordsCount) { 253bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi longerWordsInfo = mPrevWordsInfo; 254bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi longerWordsInfoCount = mPrevWordsCount; 255bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi } else { 256bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi longerWordsInfo = prevWordsInfo.mPrevWordsInfo; 257bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi longerWordsInfoCount = prevWordsInfo.mPrevWordsCount; 258bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi } 259bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi for (int i = minLength; i < longerWordsInfoCount; i++) { 260845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi if (longerWordsInfo[i] != null 261845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi && !WordInfo.EMPTY_WORD_INFO.equals(longerWordsInfo[i])) { 262845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi return false; 263845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi } 264845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi } 265845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi return true; 2661c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi } 2671c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi 2681c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi @Override 269dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi public String toString() { 270e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi final StringBuffer builder = new StringBuffer(); 271bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi for (int i = 0; i < mPrevWordsCount; i++) { 272e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi final WordInfo wordInfo = mPrevWordsInfo[i]; 273e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi builder.append("PrevWord["); 274e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi builder.append(i); 275e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi builder.append("]: "); 276845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi if (wordInfo == null) { 277845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi builder.append("null. "); 278845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi continue; 279845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi } 280845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi if (!wordInfo.isValid()) { 281e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi builder.append("Empty. "); 282e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi continue; 283e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 284e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi builder.append(wordInfo.mWord); 285e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi builder.append(", isBeginningOfSentence: "); 286e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi builder.append(wordInfo.mIsBeginningOfSentence); 287e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi builder.append(". "); 288e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 289e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi return builder.toString(); 290dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi } 29183c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi} 292