NgramContext.java revision 21a28abc6ac3a2404e7b90327122b1364a283ec9
183c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi/* 283c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * Copyright (C) 2014 The Android Open Source Project 383c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * 483c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * Licensed under the Apache License, Version 2.0 (the "License"); 583c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * you may not use this file except in compliance with the License. 683c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * You may obtain a copy of the License at 783c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * 883c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * http://www.apache.org/licenses/LICENSE-2.0 983c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * 1083c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * Unless required by applicable law or agreed to in writing, software 1183c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * distributed under the License is distributed on an "AS IS" BASIS, 1283c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1383c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * See the License for the specific language governing permissions and 1483c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi * limitations under the License. 1583c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi */ 1683c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi 1783c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagipackage com.android.inputmethod.latin; 1883c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi 1986f36003fd4397143bd37938dda029e5707634afYohei Yukawaimport android.text.TextUtils; 20e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 21bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagiimport com.android.inputmethod.annotations.UsedForTesting; 224beeb9253a06482299e0c67467531d30436a02fcJean Chalardimport com.android.inputmethod.latin.common.StringUtils; 230f7d881dc72132dfd75c8b4fe61a69fc5cdcd460Mohammadinamul Sheikimport com.android.inputmethod.latin.define.DecoderSpecificConstants; 24e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 25b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheikimport java.util.ArrayList; 2686f36003fd4397143bd37938dda029e5707634afYohei Yukawaimport java.util.Arrays; 2786f36003fd4397143bd37938dda029e5707634afYohei Yukawa 28d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8Tadashi G. Takaokaimport javax.annotation.Nonnull; 29d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8Tadashi G. Takaoka 30a790c5b68324da41428aeb68594d43ca5632f66dKeisuke Kuroyanagi/** 31a790c5b68324da41428aeb68594d43ca5632f66dKeisuke Kuroyanagi * Class to represent information of previous words. This class is used to add n-gram entries 32a790c5b68324da41428aeb68594d43ca5632f66dKeisuke Kuroyanagi * into binary dictionaries, to get predictions, and to get suggestions. 33a790c5b68324da41428aeb68594d43ca5632f66dKeisuke Kuroyanagi */ 34bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagipublic class NgramContext { 35d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8Tadashi G. Takaoka @Nonnull 36bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi public static final NgramContext EMPTY_PREV_WORDS_INFO = 37bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi new NgramContext(WordInfo.EMPTY_WORD_INFO); 38d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8Tadashi G. Takaoka @Nonnull 39bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi public static final NgramContext BEGINNING_OF_SENTENCE = 405f00fe09e9a611b647592188316e5999465df4d3Tadashi G. Takaoka new NgramContext(WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO); 411adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi 42b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik public static final String BEGINNING_OF_SENTENCE_TAG = "<S>"; 43b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik 44b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik public static final String CONTEXT_SEPARATOR = " "; 45b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik 46e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi /** 47e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi * Word information used to represent previous words information. 48e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi */ 49e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi public static class WordInfo { 50d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8Tadashi G. Takaoka @Nonnull 51e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi public static final WordInfo EMPTY_WORD_INFO = new WordInfo(null); 52d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8Tadashi G. Takaoka @Nonnull 535f00fe09e9a611b647592188316e5999465df4d3Tadashi G. Takaoka public static final WordInfo BEGINNING_OF_SENTENCE_WORD_INFO = new WordInfo(); 54e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 5586f36003fd4397143bd37938dda029e5707634afYohei Yukawa // This is an empty char sequence when mIsBeginningOfSentence is true. 5686f36003fd4397143bd37938dda029e5707634afYohei Yukawa public final CharSequence mWord; 57e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // TODO: Have sentence separator. 58e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // Whether the current context is beginning of sentence or not. This is true when composing 59e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // at the beginning of an input field or composing a word after a sentence separator. 60e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi public final boolean mIsBeginningOfSentence; 61e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 62e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // Beginning of sentence. 635f00fe09e9a611b647592188316e5999465df4d3Tadashi G. Takaoka private WordInfo() { 64e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi mWord = ""; 65e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi mIsBeginningOfSentence = true; 66e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 67e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 6886f36003fd4397143bd37938dda029e5707634afYohei Yukawa public WordInfo(final CharSequence word) { 69e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi mWord = word; 70e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi mIsBeginningOfSentence = false; 71e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 72e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 73e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi public boolean isValid() { 74e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi return mWord != null; 75e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 761c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi 771c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi @Override 781c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi public int hashCode() { 791c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi return Arrays.hashCode(new Object[] { mWord, mIsBeginningOfSentence } ); 801c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi } 811c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi 821c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi @Override 831c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi public boolean equals(Object o) { 841c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi if (this == o) return true; 851c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi if (!(o instanceof WordInfo)) return false; 861c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi final WordInfo wordInfo = (WordInfo)o; 871c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi if (mWord == null || wordInfo.mWord == null) { 881c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi return mWord == wordInfo.mWord 891c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence; 901c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi } 9186f36003fd4397143bd37938dda029e5707634afYohei Yukawa return TextUtils.equals(mWord, wordInfo.mWord) 921c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence; 931c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi } 94e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 9583c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi 96e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // The words immediately before the considered word. EMPTY_WORD_INFO element means we don't 97e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // have any context for that previous word including the "beginning of sentence context" - we 98e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // just don't know what to predict using the information. An example of that is after a comma. 99e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // For simplicity of implementation, elements may also be EMPTY_WORD_INFO transiently after the 100e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // WordComposer was reset and before starting a new composing word, but we should never be 101e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // calling getSuggetions* in this situation. 102bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi private final WordInfo[] mPrevWordsInfo; 103bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi private final int mPrevWordsCount; 10417f326b7458c2bde2569e283a96e703755485328Keisuke Kuroyanagi 105e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi // Construct from the previous word information. 106bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi public NgramContext(final WordInfo... prevWordsInfo) { 107bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi mPrevWordsInfo = prevWordsInfo; 108bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi mPrevWordsCount = prevWordsInfo.length; 109e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 110e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 111a7805e9870430eac3049129d47bebb312d457477Jatin Matani /** 112a7805e9870430eac3049129d47bebb312d457477Jatin Matani * Create next prevWordsInfo using current prevWordsInfo. 113a7805e9870430eac3049129d47bebb312d457477Jatin Matani */ 114d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8Tadashi G. Takaoka @Nonnull 115bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi public NgramContext getNextNgramContext(final WordInfo wordInfo) { 1160f7d881dc72132dfd75c8b4fe61a69fc5cdcd460Mohammadinamul Sheik final int nextPrevWordCount = Math.min( 1170f7d881dc72132dfd75c8b4fe61a69fc5cdcd460Mohammadinamul Sheik DecoderSpecificConstants.MAX_PREV_WORD_COUNT_FOR_N_GRAM, mPrevWordsCount + 1); 1184466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi final WordInfo[] prevWordsInfo = new WordInfo[nextPrevWordCount]; 119e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi prevWordsInfo[0] = wordInfo; 120bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi System.arraycopy(mPrevWordsInfo, 0, prevWordsInfo, 1, nextPrevWordCount - 1); 121bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi return new NgramContext(prevWordsInfo); 12283c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi } 1231adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi 124b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik 125b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik /** 126b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik * Extracts the previous words context. 127b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik * 128b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik * @return a String with the previous words separated by white space. 129b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik */ 130b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik public String extractPrevWordsContext() { 131b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik final ArrayList<String> terms = new ArrayList<>(); 132b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik for (int i = mPrevWordsInfo.length - 1; i >= 0; --i) { 133b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik if (mPrevWordsInfo[i] != null && mPrevWordsInfo[i].isValid()) { 134b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik final NgramContext.WordInfo wordInfo = mPrevWordsInfo[i]; 135b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik if (wordInfo.mIsBeginningOfSentence) { 136b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik terms.add(BEGINNING_OF_SENTENCE_TAG); 137b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik } else { 138b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik final String term = wordInfo.mWord.toString(); 139b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik if (!term.isEmpty()) { 140b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik terms.add(term); 141b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik } 142b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik } 143b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik } 144b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik } 14521a28abc6ac3a2404e7b90327122b1364a283ec9Chieu Nguyen return TextUtils.join(CONTEXT_SEPARATOR, terms); 146b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik } 147b00c054125d9f2aa31c2147920cc52cbf2a45cccMohammadinamul Sheik 148a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani /** 149a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani * Extracts the previous words context. 150a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani * 151a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani * @return a String array with the previous words. 152a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani */ 153a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani public String[] extractPrevWordsContextArray() { 154a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani final ArrayList<String> prevTermList = new ArrayList<>(); 155a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani for (int i = mPrevWordsInfo.length - 1; i >= 0; --i) { 156a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani if (mPrevWordsInfo[i] != null && mPrevWordsInfo[i].isValid()) { 157a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani final NgramContext.WordInfo wordInfo = mPrevWordsInfo[i]; 158a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani if (wordInfo.mIsBeginningOfSentence) { 159a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani prevTermList.add(BEGINNING_OF_SENTENCE_TAG); 160a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani } else { 161a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani final String term = wordInfo.mWord.toString(); 162a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani if (!term.isEmpty()) { 163a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani prevTermList.add(term); 164a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani } 165a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani } 166a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani } 167a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani } 16821a28abc6ac3a2404e7b90327122b1364a283ec9Chieu Nguyen final String[] contextStringArray = prevTermList.toArray(new String[prevTermList.size()]); 169a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani return contextStringArray; 170a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani } 171a568e0acb4a45707e554f63aede917bfa46b9dbaJatin Matani 1721adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi public boolean isValid() { 173bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi return mPrevWordsCount > 0 && mPrevWordsInfo[0].isValid(); 174bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi } 175bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi 176bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi public boolean isBeginningOfSentenceContext() { 177bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi return mPrevWordsCount > 0 && mPrevWordsInfo[0].mIsBeginningOfSentence; 178bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi } 179bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi 180bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi // n is 1-indexed. 181bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi // TODO: Remove 182bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi public CharSequence getNthPrevWord(final int n) { 183bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi if (n <= 0 || n > mPrevWordsCount) { 184bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi return null; 185bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi } 186bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi return mPrevWordsInfo[n - 1].mWord; 187bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi } 188bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi 189bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi // n is 1-indexed. 190bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi @UsedForTesting 19111a3965f8c376db4d8fbdf3c6ea6ac54550ae6edAdrian Velicu public boolean isNthPrevWordBeginningOfSentence(final int n) { 192bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi if (n <= 0 || n > mPrevWordsCount) { 193bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi return false; 194bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi } 195bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi return mPrevWordsInfo[n - 1].mIsBeginningOfSentence; 196e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 197e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi 198e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi public void outputToArray(final int[][] codePointArrays, 199e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi final boolean[] isBeginningOfSentenceArray) { 200bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi for (int i = 0; i < mPrevWordsCount; i++) { 201e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi final WordInfo wordInfo = mPrevWordsInfo[i]; 202e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi if (wordInfo == null || !wordInfo.isValid()) { 203e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi codePointArrays[i] = new int[0]; 204e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi isBeginningOfSentenceArray[i] = false; 205e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi continue; 206e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 207e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi codePointArrays[i] = StringUtils.toCodePointArray(wordInfo.mWord); 208e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi isBeginningOfSentenceArray[i] = wordInfo.mIsBeginningOfSentence; 209e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 2101adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi } 211dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi 2124466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi public int getPrevWordCount() { 213bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi return mPrevWordsCount; 2144466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi } 2154466464c24d6c6523f170f56b7e65e43ceb699e2Keisuke Kuroyanagi 216dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi @Override 2171c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi public int hashCode() { 218c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi int hashValue = 0; 219c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi for (final WordInfo wordInfo : mPrevWordsInfo) { 220c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi if (wordInfo == null || !WordInfo.EMPTY_WORD_INFO.equals(wordInfo)) { 221c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi break; 222c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi } 223c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi hashValue ^= wordInfo.hashCode(); 224c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi } 225c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi return hashValue; 2261c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi } 2271c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi 2281c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi @Override 2291c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi public boolean equals(Object o) { 2301c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi if (this == o) return true; 231bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi if (!(o instanceof NgramContext)) return false; 232bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi final NgramContext prevWordsInfo = (NgramContext)o; 233845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi 234bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi final int minLength = Math.min(mPrevWordsCount, prevWordsInfo.mPrevWordsCount); 235845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi for (int i = 0; i < minLength; i++) { 236845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi if (!mPrevWordsInfo[i].equals(prevWordsInfo.mPrevWordsInfo[i])) { 237845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi return false; 238845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi } 239845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi } 240bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi final WordInfo[] longerWordsInfo; 241bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi final int longerWordsInfoCount; 242bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi if (mPrevWordsCount > prevWordsInfo.mPrevWordsCount) { 243bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi longerWordsInfo = mPrevWordsInfo; 244bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi longerWordsInfoCount = mPrevWordsCount; 245bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi } else { 246bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi longerWordsInfo = prevWordsInfo.mPrevWordsInfo; 247bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi longerWordsInfoCount = prevWordsInfo.mPrevWordsCount; 248bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi } 249bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi for (int i = minLength; i < longerWordsInfoCount; i++) { 250845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi if (longerWordsInfo[i] != null 251845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi && !WordInfo.EMPTY_WORD_INFO.equals(longerWordsInfo[i])) { 252845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi return false; 253845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi } 254845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi } 255845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi return true; 2561c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi } 2571c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi 2581c2f1ada8305e36defa8572da687a4596bf083eaKeisuke Kuroyanagi @Override 259dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi public String toString() { 260e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi final StringBuffer builder = new StringBuffer(); 261bbd6a26be025bc419e342e32d86629c4ebd68dd8Keisuke Kuroyanagi for (int i = 0; i < mPrevWordsCount; i++) { 262e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi final WordInfo wordInfo = mPrevWordsInfo[i]; 263e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi builder.append("PrevWord["); 264e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi builder.append(i); 265e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi builder.append("]: "); 266845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi if (wordInfo == null) { 267845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi builder.append("null. "); 268845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi continue; 269845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi } 270845c061970f0dcbe75eac028e142aa054f88e2a9Keisuke Kuroyanagi if (!wordInfo.isValid()) { 271e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi builder.append("Empty. "); 272e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi continue; 273e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 274e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi builder.append(wordInfo.mWord); 275e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi builder.append(", isBeginningOfSentence: "); 276e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi builder.append(wordInfo.mIsBeginningOfSentence); 277e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi builder.append(". "); 278e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi } 279e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi return builder.toString(); 280dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi } 28183c40a2301a0b5a42a75eecada48e7887a7c940eKeisuke Kuroyanagi} 282