WordProperty.java revision b5ef884fbb6bfd08ce793604cdf7f0941e958a84
1/* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.android.inputmethod.latin.makedict; 18 19import com.android.inputmethod.annotations.UsedForTesting; 20import com.android.inputmethod.latin.BinaryDictionary; 21import com.android.inputmethod.latin.Dictionary; 22import com.android.inputmethod.latin.NgramContext; 23import com.android.inputmethod.latin.NgramContext.WordInfo; 24import com.android.inputmethod.latin.utils.CombinedFormatUtils; 25import com.android.inputmethod.latin.utils.StringUtils; 26 27import java.util.ArrayList; 28import java.util.Arrays; 29 30import javax.annotation.Nullable; 31 32/** 33 * Utility class for a word with a probability. 34 * 35 * This is chiefly used to iterate a dictionary. 36 */ 37public final class WordProperty implements Comparable<WordProperty> { 38 public final String mWord; 39 public final ProbabilityInfo mProbabilityInfo; 40 public final ArrayList<WeightedString> mShortcutTargets; 41 public final ArrayList<NgramProperty> mNgrams; 42 // TODO: Support mIsBeginningOfSentence. 43 public final boolean mIsBeginningOfSentence; 44 public final boolean mIsNotAWord; 45 public final boolean mIsPossiblyOffensive; 46 public final boolean mHasShortcuts; 47 public final boolean mHasNgrams; 48 49 private int mHashCode = 0; 50 51 // TODO: Support n-gram. 52 @UsedForTesting 53 public WordProperty(final String word, final ProbabilityInfo probabilityInfo, 54 final ArrayList<WeightedString> shortcutTargets, 55 @Nullable final ArrayList<WeightedString> bigrams, 56 final boolean isNotAWord, final boolean isPossiblyOffensive) { 57 mWord = word; 58 mProbabilityInfo = probabilityInfo; 59 mShortcutTargets = shortcutTargets; 60 if (null == bigrams) { 61 mNgrams = null; 62 } else { 63 mNgrams = new ArrayList<>(); 64 final NgramContext ngramContext = new NgramContext(new WordInfo(mWord)); 65 for (final WeightedString bigramTarget : bigrams) { 66 mNgrams.add(new NgramProperty(bigramTarget, ngramContext)); 67 } 68 } 69 mIsBeginningOfSentence = false; 70 mIsNotAWord = isNotAWord; 71 mIsPossiblyOffensive = isPossiblyOffensive; 72 mHasNgrams = bigrams != null && !bigrams.isEmpty(); 73 mHasShortcuts = shortcutTargets != null && !shortcutTargets.isEmpty(); 74 } 75 76 private static ProbabilityInfo createProbabilityInfoFromArray(final int[] probabilityInfo) { 77 return new ProbabilityInfo( 78 probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_PROBABILITY_INDEX], 79 probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX], 80 probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_LEVEL_INDEX], 81 probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_COUNT_INDEX]); 82 } 83 84 // Construct word property using information from native code. 85 // This represents invalid word when the probability is BinaryDictionary.NOT_A_PROBABILITY. 86 public WordProperty(final int[] codePoints, final boolean isNotAWord, 87 final boolean isPossiblyOffensive, final boolean hasBigram, final boolean hasShortcuts, 88 final boolean isBeginningOfSentence, final int[] probabilityInfo, 89 final ArrayList<int[][]> ngramPrevWordsArray, 90 final ArrayList<boolean[]> ngramPrevWordIsBeginningOfSentenceArray, 91 final ArrayList<int[]> ngramTargets, final ArrayList<int[]> ngramProbabilityInfo, 92 final ArrayList<int[]> shortcutTargets, 93 final ArrayList<Integer> shortcutProbabilities) { 94 mWord = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints); 95 mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo); 96 mShortcutTargets = new ArrayList<>(); 97 final ArrayList<NgramProperty> ngrams = new ArrayList<>(); 98 mIsBeginningOfSentence = isBeginningOfSentence; 99 mIsNotAWord = isNotAWord; 100 mIsPossiblyOffensive = isPossiblyOffensive; 101 mHasShortcuts = hasShortcuts; 102 mHasNgrams = hasBigram; 103 104 final int relatedNgramCount = ngramTargets.size(); 105 for (int i = 0; i < relatedNgramCount; i++) { 106 final String ngramTargetString = 107 StringUtils.getStringFromNullTerminatedCodePointArray(ngramTargets.get(i)); 108 final WeightedString ngramTarget = new WeightedString(ngramTargetString, 109 createProbabilityInfoFromArray(ngramProbabilityInfo.get(i))); 110 final int[][] prevWords = ngramPrevWordsArray.get(i); 111 final boolean[] isBeginningOfSentenceArray = 112 ngramPrevWordIsBeginningOfSentenceArray.get(i); 113 final WordInfo[] wordInfoArray = new WordInfo[prevWords.length]; 114 for (int j = 0; j < prevWords.length; j++) { 115 wordInfoArray[j] = isBeginningOfSentenceArray[j] 116 ? WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO 117 : new WordInfo(StringUtils.getStringFromNullTerminatedCodePointArray( 118 prevWords[j])); 119 } 120 final NgramContext ngramContext = new NgramContext(wordInfoArray); 121 ngrams.add(new NgramProperty(ngramTarget, ngramContext)); 122 } 123 mNgrams = ngrams.isEmpty() ? null : ngrams; 124 125 final int shortcutTargetCount = shortcutTargets.size(); 126 for (int i = 0; i < shortcutTargetCount; i++) { 127 final String shortcutTargetString = 128 StringUtils.getStringFromNullTerminatedCodePointArray(shortcutTargets.get(i)); 129 mShortcutTargets.add( 130 new WeightedString(shortcutTargetString, shortcutProbabilities.get(i))); 131 } 132 } 133 134 // TODO: Remove 135 @UsedForTesting 136 public ArrayList<WeightedString> getBigrams() { 137 if (null == mNgrams) { 138 return null; 139 } 140 final ArrayList<WeightedString> bigrams = new ArrayList<>(); 141 for (final NgramProperty ngram : mNgrams) { 142 if (ngram.mNgramContext.getPrevWordCount() == 1) { 143 bigrams.add(ngram.mTargetWord); 144 } 145 } 146 return bigrams; 147 } 148 149 public int getProbability() { 150 return mProbabilityInfo.mProbability; 151 } 152 153 private static int computeHashCode(WordProperty word) { 154 return Arrays.hashCode(new Object[] { 155 word.mWord, 156 word.mProbabilityInfo, 157 word.mShortcutTargets, 158 word.mNgrams, 159 word.mIsNotAWord, 160 word.mIsPossiblyOffensive 161 }); 162 } 163 164 /** 165 * Three-way comparison. 166 * 167 * A Word x is greater than a word y if x has a higher frequency. If they have the same 168 * frequency, they are sorted in lexicographic order. 169 */ 170 @Override 171 public int compareTo(final WordProperty w) { 172 if (getProbability() < w.getProbability()) return 1; 173 if (getProbability() > w.getProbability()) return -1; 174 return mWord.compareTo(w.mWord); 175 } 176 177 /** 178 * Equality test. 179 * 180 * Words are equal if they have the same frequency, the same spellings, and the same 181 * attributes. 182 */ 183 @Override 184 public boolean equals(Object o) { 185 if (o == this) return true; 186 if (!(o instanceof WordProperty)) return false; 187 WordProperty w = (WordProperty)o; 188 return mProbabilityInfo.equals(w.mProbabilityInfo) && mWord.equals(w.mWord) 189 && mShortcutTargets.equals(w.mShortcutTargets) && equals(mNgrams, w.mNgrams) 190 && mIsNotAWord == w.mIsNotAWord && mIsPossiblyOffensive == w.mIsPossiblyOffensive 191 && mHasNgrams == w.mHasNgrams && mHasShortcuts && w.mHasNgrams; 192 } 193 194 // TDOO: Have a utility method like java.util.Objects.equals. 195 private static <T> boolean equals(final ArrayList<T> a, final ArrayList<T> b) { 196 if (null == a) { 197 return null == b; 198 } 199 return a.equals(b); 200 } 201 202 @Override 203 public int hashCode() { 204 if (mHashCode == 0) { 205 mHashCode = computeHashCode(this); 206 } 207 return mHashCode; 208 } 209 210 @UsedForTesting 211 public boolean isValid() { 212 return getProbability() != Dictionary.NOT_A_PROBABILITY; 213 } 214 215 @Override 216 public String toString() { 217 return CombinedFormatUtils.formatWordProperty(this); 218 } 219} 220