WordProperty.java revision 4beeb9253a06482299e0c67467531d30436a02fc
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin.makedict;
18
19import com.android.inputmethod.annotations.UsedForTesting;
20import com.android.inputmethod.latin.BinaryDictionary;
21import com.android.inputmethod.latin.Dictionary;
22import com.android.inputmethod.latin.NgramContext;
23import com.android.inputmethod.latin.NgramContext.WordInfo;
24import com.android.inputmethod.latin.common.StringUtils;
25import com.android.inputmethod.latin.utils.CombinedFormatUtils;
26
27import java.util.ArrayList;
28import java.util.Arrays;
29
30import javax.annotation.Nullable;
31
32/**
33 * Utility class for a word with a probability.
34 *
35 * This is chiefly used to iterate a dictionary.
36 */
37public final class WordProperty implements Comparable<WordProperty> {
38    public final String mWord;
39    public final ProbabilityInfo mProbabilityInfo;
40    public final ArrayList<WeightedString> mShortcutTargets;
41    public final ArrayList<NgramProperty> mNgrams;
42    // TODO: Support mIsBeginningOfSentence.
43    public final boolean mIsBeginningOfSentence;
44    public final boolean mIsNotAWord;
45    public final boolean mIsPossiblyOffensive;
46    public final boolean mHasShortcuts;
47    public final boolean mHasNgrams;
48
49    private int mHashCode = 0;
50
51    // TODO: Support n-gram.
52    @UsedForTesting
53    public WordProperty(final String word, final ProbabilityInfo probabilityInfo,
54            final ArrayList<WeightedString> shortcutTargets,
55            @Nullable final ArrayList<WeightedString> bigrams,
56            final boolean isNotAWord, final boolean isPossiblyOffensive) {
57        mWord = word;
58        mProbabilityInfo = probabilityInfo;
59        mShortcutTargets = shortcutTargets;
60        if (null == bigrams) {
61            mNgrams = null;
62        } else {
63            mNgrams = new ArrayList<>();
64            final NgramContext ngramContext = new NgramContext(new WordInfo(mWord));
65            for (final WeightedString bigramTarget : bigrams) {
66                mNgrams.add(new NgramProperty(bigramTarget, ngramContext));
67            }
68        }
69        mIsBeginningOfSentence = false;
70        mIsNotAWord = isNotAWord;
71        mIsPossiblyOffensive = isPossiblyOffensive;
72        mHasNgrams = bigrams != null && !bigrams.isEmpty();
73        mHasShortcuts = shortcutTargets != null && !shortcutTargets.isEmpty();
74    }
75
76    private static ProbabilityInfo createProbabilityInfoFromArray(final int[] probabilityInfo) {
77      return new ProbabilityInfo(
78              probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_PROBABILITY_INDEX],
79              probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX],
80              probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_LEVEL_INDEX],
81              probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_COUNT_INDEX]);
82    }
83
84    // Construct word property using information from native code.
85    // This represents invalid word when the probability is BinaryDictionary.NOT_A_PROBABILITY.
86    public WordProperty(final int[] codePoints, final boolean isNotAWord,
87            final boolean isPossiblyOffensive, final boolean hasBigram, final boolean hasShortcuts,
88            final boolean isBeginningOfSentence, final int[] probabilityInfo,
89            final ArrayList<int[][]> ngramPrevWordsArray,
90            final ArrayList<boolean[]> ngramPrevWordIsBeginningOfSentenceArray,
91            final ArrayList<int[]> ngramTargets, final ArrayList<int[]> ngramProbabilityInfo,
92            final ArrayList<int[]> shortcutTargets,
93            final ArrayList<Integer> shortcutProbabilities) {
94        mWord = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints);
95        mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo);
96        mShortcutTargets = new ArrayList<>();
97        final ArrayList<NgramProperty> ngrams = new ArrayList<>();
98        mIsBeginningOfSentence = isBeginningOfSentence;
99        mIsNotAWord = isNotAWord;
100        mIsPossiblyOffensive = isPossiblyOffensive;
101        mHasShortcuts = hasShortcuts;
102        mHasNgrams = hasBigram;
103
104        final int relatedNgramCount = ngramTargets.size();
105        for (int i = 0; i < relatedNgramCount; i++) {
106            final String ngramTargetString =
107                    StringUtils.getStringFromNullTerminatedCodePointArray(ngramTargets.get(i));
108            final WeightedString ngramTarget = new WeightedString(ngramTargetString,
109                    createProbabilityInfoFromArray(ngramProbabilityInfo.get(i)));
110            final int[][] prevWords = ngramPrevWordsArray.get(i);
111            final boolean[] isBeginningOfSentenceArray =
112                    ngramPrevWordIsBeginningOfSentenceArray.get(i);
113            final WordInfo[] wordInfoArray = new WordInfo[prevWords.length];
114            for (int j = 0; j < prevWords.length; j++) {
115                wordInfoArray[j] = isBeginningOfSentenceArray[j]
116                        ? WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO
117                        : new WordInfo(StringUtils.getStringFromNullTerminatedCodePointArray(
118                                prevWords[j]));
119            }
120            final NgramContext ngramContext = new NgramContext(wordInfoArray);
121            ngrams.add(new NgramProperty(ngramTarget, ngramContext));
122        }
123        mNgrams = ngrams.isEmpty() ? null : ngrams;
124
125        final int shortcutTargetCount = shortcutTargets.size();
126        for (int i = 0; i < shortcutTargetCount; i++) {
127            final String shortcutTargetString =
128                    StringUtils.getStringFromNullTerminatedCodePointArray(shortcutTargets.get(i));
129            mShortcutTargets.add(
130                    new WeightedString(shortcutTargetString, shortcutProbabilities.get(i)));
131        }
132    }
133
134    // TODO: Remove
135    @UsedForTesting
136    public ArrayList<WeightedString> getBigrams() {
137        if (null == mNgrams) {
138            return null;
139        }
140        final ArrayList<WeightedString> bigrams = new ArrayList<>();
141        for (final NgramProperty ngram : mNgrams) {
142            if (ngram.mNgramContext.getPrevWordCount() == 1) {
143                bigrams.add(ngram.mTargetWord);
144            }
145        }
146        return bigrams;
147    }
148
149    public int getProbability() {
150        return mProbabilityInfo.mProbability;
151    }
152
153    private static int computeHashCode(WordProperty word) {
154        return Arrays.hashCode(new Object[] {
155                word.mWord,
156                word.mProbabilityInfo,
157                word.mShortcutTargets,
158                word.mNgrams,
159                word.mIsNotAWord,
160                word.mIsPossiblyOffensive
161        });
162    }
163
164    /**
165     * Three-way comparison.
166     *
167     * A Word x is greater than a word y if x has a higher frequency. If they have the same
168     * frequency, they are sorted in lexicographic order.
169     */
170    @Override
171    public int compareTo(final WordProperty w) {
172        if (getProbability() < w.getProbability()) return 1;
173        if (getProbability() > w.getProbability()) return -1;
174        return mWord.compareTo(w.mWord);
175    }
176
177    /**
178     * Equality test.
179     *
180     * Words are equal if they have the same frequency, the same spellings, and the same
181     * attributes.
182     */
183    @Override
184    public boolean equals(Object o) {
185        if (o == this) return true;
186        if (!(o instanceof WordProperty)) return false;
187        WordProperty w = (WordProperty)o;
188        return mProbabilityInfo.equals(w.mProbabilityInfo) && mWord.equals(w.mWord)
189                && mShortcutTargets.equals(w.mShortcutTargets) && equals(mNgrams, w.mNgrams)
190                && mIsNotAWord == w.mIsNotAWord && mIsPossiblyOffensive == w.mIsPossiblyOffensive
191                && mHasNgrams == w.mHasNgrams && mHasShortcuts && w.mHasNgrams;
192    }
193
194    // TDOO: Have a utility method like java.util.Objects.equals.
195    private static <T> boolean equals(final ArrayList<T> a, final ArrayList<T> b) {
196        if (null == a) {
197            return null == b;
198        }
199        return a.equals(b);
200    }
201
202    @Override
203    public int hashCode() {
204        if (mHashCode == 0) {
205            mHashCode = computeHashCode(this);
206        }
207        return mHashCode;
208    }
209
210    @UsedForTesting
211    public boolean isValid() {
212        return getProbability() != Dictionary.NOT_A_PROBABILITY;
213    }
214
215    @Override
216    public String toString() {
217        return CombinedFormatUtils.formatWordProperty(this);
218    }
219}
220