1bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi/* 2bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi * Copyright (C) 2014 The Android Open Source Project 3bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi * 4bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi * Licensed under the Apache License, Version 2.0 (the "License"); 5bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi * you may not use this file except in compliance with the License. 6bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi * You may obtain a copy of the License at 7bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi * 8bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi * http://www.apache.org/licenses/LICENSE-2.0 9bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi * 10bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi * Unless required by applicable law or agreed to in writing, software 11bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi * distributed under the License is distributed on an "AS IS" BASIS, 12bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi * See the License for the specific language governing permissions and 14bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi * limitations under the License. 15bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi */ 16bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi 17bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagipackage com.android.inputmethod.latin.utils; 18bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi 19bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagiimport java.util.regex.Pattern; 20bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi 21bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagiimport com.android.inputmethod.latin.Constants; 22bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagiimport com.android.inputmethod.latin.PrevWordsInfo; 23bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagiimport com.android.inputmethod.latin.PrevWordsInfo.WordInfo; 24bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagiimport com.android.inputmethod.latin.settings.SpacingAndPunctuations; 25bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi 26bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagipublic final class PrevWordsInfoUtils { 27bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi private PrevWordsInfoUtils() { 28bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi // Intentional empty constructor for utility class. 29bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi } 30bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi 31bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi private static final Pattern SPACE_REGEX = Pattern.compile("\\s+"); 32bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi // Get context information from nth word before the cursor. n = 1 retrieves the words 33bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi // immediately before the cursor, n = 2 retrieves the words before that, and so on. This splits 34bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi // on whitespace only. 35bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi // Also, it won't return words that end in a separator (if the nth word before the cursor 36bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi // ends in a separator, it returns information representing beginning-of-sentence). 37bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi // Example (when Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM is 2): 38bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi // (n = 1) "abc def|" -> abc, def 39bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi // (n = 1) "abc def |" -> abc, def 40bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi // (n = 1) "abc 'def|" -> empty, 'def 41bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi // (n = 1) "abc def. |" -> beginning-of-sentence 42bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi // (n = 1) "abc def . |" -> beginning-of-sentence 43bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi // (n = 2) "abc def|" -> beginning-of-sentence, abc 44bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi // (n = 2) "abc def |" -> beginning-of-sentence, abc 45bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi // (n = 2) "abc 'def|" -> empty. The context is different from "abc def", but we cannot 46bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi // represent this situation using PrevWordsInfo. See TODO in the method. 47bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi // TODO: The next example's result should be "abc, def". This have to be fixed before we 48bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi // retrieve the prior context of Beginning-of-Sentence. 49bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi // (n = 2) "abc def. |" -> beginning-of-sentence, abc 50bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi // (n = 2) "abc def . |" -> abc, def 51bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi // (n = 2) "abc|" -> beginning-of-sentence 52bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi // (n = 2) "abc |" -> beginning-of-sentence 53bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi // (n = 2) "abc. def|" -> beginning-of-sentence 54bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi public static PrevWordsInfo getPrevWordsInfoFromNthPreviousWord(final CharSequence prev, 55bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi final SpacingAndPunctuations spacingAndPunctuations, final int n) { 56bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi if (prev == null) return PrevWordsInfo.EMPTY_PREV_WORDS_INFO; 57bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi final String[] w = SPACE_REGEX.split(prev); 58bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi final WordInfo[] prevWordsInfo = new WordInfo[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM]; 59bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi for (int i = 0; i < prevWordsInfo.length; i++) { 60bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi final int focusedWordIndex = w.length - n - i; 61bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi // Referring to the word after the focused word. 62bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi if ((focusedWordIndex + 1) >= 0 && (focusedWordIndex + 1) < w.length) { 63bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi final String wordFollowingTheNthPrevWord = w[focusedWordIndex + 1]; 64bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi if (!wordFollowingTheNthPrevWord.isEmpty()) { 65bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi final char firstChar = wordFollowingTheNthPrevWord.charAt(0); 66bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi if (spacingAndPunctuations.isWordConnector(firstChar)) { 67bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi // The word following the focused word is starting with a word connector. 68bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi // TODO: Return meaningful context for this case. 69bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi prevWordsInfo[i] = WordInfo.EMPTY_WORD_INFO; 70bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi break; 71bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi } 72bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi } 73bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi } 74bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi // If we can't find (n + i) words, the context is beginning-of-sentence. 75bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi if (focusedWordIndex < 0) { 76bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE; 77bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi break; 78bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi } 79bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi final String focusedWord = w[focusedWordIndex]; 80bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi // If the word is, the context is beginning-of-sentence. 81bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi final int length = focusedWord.length(); 82bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi if (length <= 0) { 83bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE; 84bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi break; 85bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi } 86bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi // If ends in a sentence separator, the context is beginning-of-sentence. 87bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi final char lastChar = focusedWord.charAt(length - 1); 88bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi if (spacingAndPunctuations.isSentenceSeparator(lastChar)) { 89bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE; 90bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi break; 91bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi } 92bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi // If ends in a word separator or connector, the context is unclear. 93bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi // TODO: Return meaningful context for this case. 94bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi if (spacingAndPunctuations.isWordSeparator(lastChar) 95bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi || spacingAndPunctuations.isWordConnector(lastChar)) { 96bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi prevWordsInfo[i] = WordInfo.EMPTY_WORD_INFO; 97bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi break; 98bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi } 99bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi prevWordsInfo[i] = new WordInfo(focusedWord); 100bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi } 101bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi return new PrevWordsInfo(prevWordsInfo); 102bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi } 103bb843eb223ce0f8fb1088ed3393a4165123ddb1fKeisuke Kuroyanagi} 104