NgramContext.java revision a568e0acb4a45707e554f63aede917bfa46b9dba
183c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi/* 283c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * Copyright (C) 2014 The Android Open Source Project 383c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * 483c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * Licensed under the Apache License, Version 2.0 (the "License"); 583c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * you may not use this file except in compliance with the License. 683c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * You may obtain a copy of the License at 783c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * 883c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * http://www.apache.org/licenses/LICENSE-2.0 983c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * 1083c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * Unless required by applicable law or agreed to in writing, software 1183c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * distributed under the License is distributed on an "AS IS" BASIS, 1283c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1383c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * See the License for the specific language governing permissions and 1483c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * limitations under the License. 1583c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi */ 1683c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi 1783c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagipackage com.android.inputmethod.latin; 1883c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi 1986f36003fd4397143bd37938dda029e5707634afYohei Yukawaimport android.text.TextUtils; 20e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 21bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagiimport com.android.inputmethod.annotations.UsedForTesting; 224beeb9253a06482299e0c67467531d30436a02fcJean Chalardimport com.android.inputmethod.latin.common.StringUtils; 230f7d881dc72132dfd75c8b4fe61a69fc5cdcd460Mohammadinamul Sheikimport com.android.inputmethod.latin.define.DecoderSpecificConstants; 24e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 25b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheikimport java.util.ArrayList; 2686f36003fd4397143bd37938dda029e5707634afYohei Yukawaimport java.util.Arrays; 2786f36003fd4397143bd37938dda029e5707634afYohei Yukawa 28d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8Tadashi G. Takaokaimport javax.annotation.Nonnull; 29d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8Tadashi G. Takaoka 30a790c5b68324da41428aeb68594d43ca5632f66dKeisuke Kuroyanagi/** 31a790c5b68324da41428aeb68594d43ca5632f66dKeisuke Kuroyanagi * Class to represent information of previous words. This class is used to add n-gram entries 32a790c5b68324da41428aeb68594d43ca5632f66dKeisuke Kuroyanagi * into binary dictionaries, to get predictions, and to get suggestions. 33a790c5b68324da41428aeb68594d43ca5632f66dKeisuke Kuroyanagi */ 34bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagipublic class NgramContext { 35d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8Tadashi G. Takaoka @Nonnull 36bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi public static final NgramContext EMPTY_PREV_WORDS_INFO = 37bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi new NgramContext(WordInfo.EMPTY_WORD_INFO); 38d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8Tadashi G. Takaoka @Nonnull 39bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi public static final NgramContext BEGINNING_OF_SENTENCE = 405f00fe09e9a611b647592188316e5999465df4d3Tadashi G. Takaoka new NgramContext(WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO); 411adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi 42b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik public static final String BEGINNING_OF_SENTENCE_TAG = "<S>"; 43b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik 44b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik public static final String CONTEXT_SEPARATOR = " "; 45b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik 46e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi /** 47e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi * Word information used to represent previous words information. 48e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi */ 49e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi public static class WordInfo { 50d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8Tadashi G. Takaoka @Nonnull 51e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi public static final WordInfo EMPTY_WORD_INFO = new WordInfo(null); 52d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8Tadashi G. Takaoka @Nonnull 535f00fe09e9a611b647592188316e5999465df4d3Tadashi G. Takaoka public static final WordInfo BEGINNING_OF_SENTENCE_WORD_INFO = new WordInfo(); 54e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 5586f36003fd4397143bd37938dda029e5707634afYohei Yukawa // This is an empty char sequence when mIsBeginningOfSentence is true. 5686f36003fd4397143bd37938dda029e5707634afYohei Yukawa public final CharSequence mWord; 57e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // TODO: Have sentence separator. 58e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // Whether the current context is beginning of sentence or not. This is true when composing 59e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // at the beginning of an input field or composing a word after a sentence separator. 60e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi public final boolean mIsBeginningOfSentence; 61e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 62e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // Beginning of sentence. 635f00fe09e9a611b647592188316e5999465df4d3Tadashi G. Takaoka private WordInfo() { 64e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi mWord = ""; 65e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi mIsBeginningOfSentence = true; 66e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 67e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 6886f36003fd4397143bd37938dda029e5707634afYohei Yukawa public WordInfo(final CharSequence word) { 69e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi mWord = word; 70e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi mIsBeginningOfSentence = false; 71e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 72e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 73e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi public boolean isValid() { 74e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi return mWord != null; 75e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 761c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi 771c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi @Override 781c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi public int hashCode() { 791c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi return Arrays.hashCode(new Object[] { mWord, mIsBeginningOfSentence } ); 801c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi } 811c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi 821c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi @Override 831c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi public boolean equals(Object o) { 841c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi if (this == o) return true; 851c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi if (!(o instanceof WordInfo)) return false; 861c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi final WordInfo wordInfo = (WordInfo)o; 871c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi if (mWord == null || wordInfo.mWord == null) { 881c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi return mWord == wordInfo.mWord 891c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence; 901c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi } 9186f36003fd4397143bd37938dda029e5707634afYohei Yukawa return TextUtils.equals(mWord, wordInfo.mWord) 921c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence; 931c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi } 94e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 9583c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi 96e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // The words immediately before the considered word. EMPTY_WORD_INFO element means we don't 97e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // have any context for that previous word including the "beginning of sentence context" - we 98e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // just don't know what to predict using the information. An example of that is after a comma. 99e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // For simplicity of implementation, elements may also be EMPTY_WORD_INFO transiently after the 100e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // WordComposer was reset and before starting a new composing word, but we should never be 101e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // calling getSuggetions* in this situation. 102bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi private final WordInfo[] mPrevWordsInfo; 103bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi private final int mPrevWordsCount; 10417f326b7458c2bde2569e283a96e703755485328Keisuke Kuroyanagi 105e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // Construct from the previous word information. 106bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi public NgramContext(final WordInfo... prevWordsInfo) { 107bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi mPrevWordsInfo = prevWordsInfo; 108bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi mPrevWordsCount = prevWordsInfo.length; 109e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 110e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 111a7805e9870430eac3049129d47bebb312d457477Jatin Matani /** 112a7805e9870430eac3049129d47bebb312d457477Jatin Matani * Create next prevWordsInfo using current prevWordsInfo. 113a7805e9870430eac3049129d47bebb312d457477Jatin Matani */ 114d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8Tadashi G. Takaoka @Nonnull 115bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi public NgramContext getNextNgramContext(final WordInfo wordInfo) { 1160f7d881dc72132dfd75c8b4fe61a69fc5cdcd460Mohammadinamul Sheik final int nextPrevWordCount = Math.min( 1170f7d881dc72132dfd75c8b4fe61a69fc5cdcd460Mohammadinamul Sheik DecoderSpecificConstants.MAX_PREV_WORD_COUNT_FOR_N_GRAM, mPrevWordsCount + 1); 1184466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi final WordInfo[] prevWordsInfo = new WordInfo[nextPrevWordCount]; 119e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi prevWordsInfo[0] = wordInfo; 120bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi System.arraycopy(mPrevWordsInfo, 0, prevWordsInfo, 1, nextPrevWordCount - 1); 121bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi return new NgramContext(prevWordsInfo); 12283c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi } 1231adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi 124b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik 125b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik /** 126b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik * Extracts the previous words context. 127b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik * 128b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik * @return a String with the previous words separated by white space. 129b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik */ 130b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik public String extractPrevWordsContext() { 131b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik final ArrayList<String> terms = new ArrayList<>(); 132b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik for (int i = mPrevWordsInfo.length - 1; i >= 0; --i) { 133b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik if (mPrevWordsInfo[i] != null && mPrevWordsInfo[i].isValid()) { 134b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik final NgramContext.WordInfo wordInfo = mPrevWordsInfo[i]; 135b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik if (wordInfo.mIsBeginningOfSentence) { 136b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik terms.add(BEGINNING_OF_SENTENCE_TAG); 137b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik } else { 138b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik final String term = wordInfo.mWord.toString(); 139b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik if (!term.isEmpty()) { 140b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik terms.add(term); 141b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik } 142b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik } 143b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik } 144b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik } 145b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik return terms.size() == 0 ? BEGINNING_OF_SENTENCE_TAG 146b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik : TextUtils.join(CONTEXT_SEPARATOR, terms); 147b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik } 148b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik 149a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani /** 150a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani * Extracts the previous words context. 151a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani * 152a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani * @return a String array with the previous words. 153a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani */ 154a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani public String[] extractPrevWordsContextArray() { 155a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani final ArrayList<String> prevTermList = new ArrayList<>(); 156a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani for (int i = mPrevWordsInfo.length - 1; i >= 0; --i) { 157a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani if (mPrevWordsInfo[i] != null && mPrevWordsInfo[i].isValid()) { 158a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani final NgramContext.WordInfo wordInfo = mPrevWordsInfo[i]; 159a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani if (wordInfo.mIsBeginningOfSentence) { 160a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani prevTermList.add(BEGINNING_OF_SENTENCE_TAG); 161a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani } else { 162a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani final String term = wordInfo.mWord.toString(); 163a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani if (!term.isEmpty()) { 164a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani prevTermList.add(term); 165a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani } 166a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani } 167a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani } 168a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani } 169a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani final String[] contextStringArray = prevTermList.size() == 0 ? 170a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani new String[] { BEGINNING_OF_SENTENCE_TAG } 171a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani : prevTermList.toArray(new String[prevTermList.size()]); 172a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani return contextStringArray; 173a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani } 174a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani 1751adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi public boolean isValid() { 176bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi return mPrevWordsCount > 0 && mPrevWordsInfo[0].isValid(); 177bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi } 178bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi 179bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi public boolean isBeginningOfSentenceContext() { 180bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi return mPrevWordsCount > 0 && mPrevWordsInfo[0].mIsBeginningOfSentence; 181bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi } 182bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi 183bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi // n is 1-indexed. 184bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi // TODO: Remove 185bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi public CharSequence getNthPrevWord(final int n) { 186bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi if (n <= 0 || n > mPrevWordsCount) { 187bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi return null; 188bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi } 189bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi return mPrevWordsInfo[n - 1].mWord; 190bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi } 191bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi 192bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi // n is 1-indexed. 193bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi @UsedForTesting 19411a3965f8c376db4d8fbdf3c6ea6ac54550ae6edAdrian Velicu public boolean isNthPrevWordBeginningOfSentence(final int n) { 195bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi if (n <= 0 || n > mPrevWordsCount) { 196bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi return false; 197bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi } 198bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi return mPrevWordsInfo[n - 1].mIsBeginningOfSentence; 199e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 200e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 201e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi public void outputToArray(final int[][] codePointArrays, 202e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi final boolean[] isBeginningOfSentenceArray) { 203bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi for (int i = 0; i < mPrevWordsCount; i++) { 204e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi final WordInfo wordInfo = mPrevWordsInfo[i]; 205e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi if (wordInfo == null || !wordInfo.isValid()) { 206e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi codePointArrays[i] = new int[0]; 207e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi isBeginningOfSentenceArray[i] = false; 208e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi continue; 209e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 210e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi codePointArrays[i] = StringUtils.toCodePointArray(wordInfo.mWord); 211e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi isBeginningOfSentenceArray[i] = wordInfo.mIsBeginningOfSentence; 212e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 2131adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi } 214dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi 2154466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi public int getPrevWordCount() { 216bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi return mPrevWordsCount; 2174466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi } 2184466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi 219dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi @Override 2201c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi public int hashCode() { 221c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi int hashValue = 0; 222c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi for (final WordInfo wordInfo : mPrevWordsInfo) { 223c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi if (wordInfo == null || !WordInfo.EMPTY_WORD_INFO.equals(wordInfo)) { 224c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi break; 225c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi } 226c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi hashValue ^= wordInfo.hashCode(); 227c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi } 228c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi return hashValue; 2291c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi } 2301c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi 2311c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi @Override 2321c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi public boolean equals(Object o) { 2331c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi if (this == o) return true; 234bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi if (!(o instanceof NgramContext)) return false; 235bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi final NgramContext prevWordsInfo = (NgramContext)o; 236845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi 237bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi final int minLength = Math.min(mPrevWordsCount, prevWordsInfo.mPrevWordsCount); 238845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi for (int i = 0; i < minLength; i++) { 239845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi if (!mPrevWordsInfo[i].equals(prevWordsInfo.mPrevWordsInfo[i])) { 240845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi return false; 241845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi } 242845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi } 243bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi final WordInfo[] longerWordsInfo; 244bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi final int longerWordsInfoCount; 245bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi if (mPrevWordsCount > prevWordsInfo.mPrevWordsCount) { 246bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi longerWordsInfo = mPrevWordsInfo; 247bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi longerWordsInfoCount = mPrevWordsCount; 248bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi } else { 249bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi longerWordsInfo = prevWordsInfo.mPrevWordsInfo; 250bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi longerWordsInfoCount = prevWordsInfo.mPrevWordsCount; 251bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi } 252bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi for (int i = minLength; i < longerWordsInfoCount; i++) { 253845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi if (longerWordsInfo[i] != null 254845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi && !WordInfo.EMPTY_WORD_INFO.equals(longerWordsInfo[i])) { 255845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi return false; 256845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi } 257845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi } 258845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi return true; 2591c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi } 2601c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi 2611c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi @Override 262dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi public String toString() { 263e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi final StringBuffer builder = new StringBuffer(); 264bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi for (int i = 0; i < mPrevWordsCount; i++) { 265e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi final WordInfo wordInfo = mPrevWordsInfo[i]; 266e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi builder.append("PrevWord["); 267e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi builder.append(i); 268e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi builder.append("]: "); 269845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi if (wordInfo == null) { 270845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi builder.append("null. "); 271845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi continue; 272845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi } 273845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi if (!wordInfo.isValid()) { 274e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi builder.append("Empty. "); 275e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi continue; 276e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 277e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi builder.append(wordInfo.mWord); 278e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi builder.append(", isBeginningOfSentence: "); 279e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi builder.append(wordInfo.mIsBeginningOfSentence); 280e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi builder.append(". "); 281e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 282e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi return builder.toString(); 283dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi } 28483c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi} 285