1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin.makedict;
18
19import com.android.inputmethod.annotations.UsedForTesting;
20import com.android.inputmethod.latin.BinaryDictionary;
21import com.android.inputmethod.latin.Dictionary;
22import com.android.inputmethod.latin.NgramContext;
23import com.android.inputmethod.latin.NgramContext.WordInfo;
24import com.android.inputmethod.latin.common.StringUtils;
25import com.android.inputmethod.latin.utils.CombinedFormatUtils;
26
27import java.util.ArrayList;
28import java.util.Arrays;
29
30import javax.annotation.Nullable;
31
32/**
33 * Utility class for a word with a probability.
34 *
35 * This is chiefly used to iterate a dictionary.
36 */
37public final class WordProperty implements Comparable<WordProperty> {
38    public final String mWord;
39    public final ProbabilityInfo mProbabilityInfo;
40    public final ArrayList<NgramProperty> mNgrams;
41    // TODO: Support mIsBeginningOfSentence.
42    public final boolean mIsBeginningOfSentence;
43    public final boolean mIsNotAWord;
44    public final boolean mIsPossiblyOffensive;
45    public final boolean mHasNgrams;
46
47    private int mHashCode = 0;
48
49    // TODO: Support n-gram.
50    @UsedForTesting
51    public WordProperty(final String word, final ProbabilityInfo probabilityInfo,
52            @Nullable final ArrayList<WeightedString> bigrams,
53            final boolean isNotAWord, final boolean isPossiblyOffensive) {
54        mWord = word;
55        mProbabilityInfo = probabilityInfo;
56        if (null == bigrams) {
57            mNgrams = null;
58        } else {
59            mNgrams = new ArrayList<>();
60            final NgramContext ngramContext = new NgramContext(new WordInfo(mWord));
61            for (final WeightedString bigramTarget : bigrams) {
62                mNgrams.add(new NgramProperty(bigramTarget, ngramContext));
63            }
64        }
65        mIsBeginningOfSentence = false;
66        mIsNotAWord = isNotAWord;
67        mIsPossiblyOffensive = isPossiblyOffensive;
68        mHasNgrams = bigrams != null && !bigrams.isEmpty();
69    }
70
71    private static ProbabilityInfo createProbabilityInfoFromArray(final int[] probabilityInfo) {
72      return new ProbabilityInfo(
73              probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_PROBABILITY_INDEX],
74              probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX],
75              probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_LEVEL_INDEX],
76              probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_COUNT_INDEX]);
77    }
78
79    // Construct word property using information from native code.
80    // This represents invalid word when the probability is BinaryDictionary.NOT_A_PROBABILITY.
81    public WordProperty(final int[] codePoints, final boolean isNotAWord,
82            final boolean isPossiblyOffensive, final boolean hasBigram,
83            final boolean isBeginningOfSentence, final int[] probabilityInfo,
84            final ArrayList<int[][]> ngramPrevWordsArray,
85            final ArrayList<boolean[]> ngramPrevWordIsBeginningOfSentenceArray,
86            final ArrayList<int[]> ngramTargets, final ArrayList<int[]> ngramProbabilityInfo) {
87        mWord = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints);
88        mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo);
89        final ArrayList<NgramProperty> ngrams = new ArrayList<>();
90        mIsBeginningOfSentence = isBeginningOfSentence;
91        mIsNotAWord = isNotAWord;
92        mIsPossiblyOffensive = isPossiblyOffensive;
93        mHasNgrams = hasBigram;
94
95        final int relatedNgramCount = ngramTargets.size();
96        for (int i = 0; i < relatedNgramCount; i++) {
97            final String ngramTargetString =
98                    StringUtils.getStringFromNullTerminatedCodePointArray(ngramTargets.get(i));
99            final WeightedString ngramTarget = new WeightedString(ngramTargetString,
100                    createProbabilityInfoFromArray(ngramProbabilityInfo.get(i)));
101            final int[][] prevWords = ngramPrevWordsArray.get(i);
102            final boolean[] isBeginningOfSentenceArray =
103                    ngramPrevWordIsBeginningOfSentenceArray.get(i);
104            final WordInfo[] wordInfoArray = new WordInfo[prevWords.length];
105            for (int j = 0; j < prevWords.length; j++) {
106                wordInfoArray[j] = isBeginningOfSentenceArray[j]
107                        ? WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO
108                        : new WordInfo(StringUtils.getStringFromNullTerminatedCodePointArray(
109                                prevWords[j]));
110            }
111            final NgramContext ngramContext = new NgramContext(wordInfoArray);
112            ngrams.add(new NgramProperty(ngramTarget, ngramContext));
113        }
114        mNgrams = ngrams.isEmpty() ? null : ngrams;
115    }
116
117    // TODO: Remove
118    @UsedForTesting
119    public ArrayList<WeightedString> getBigrams() {
120        if (null == mNgrams) {
121            return null;
122        }
123        final ArrayList<WeightedString> bigrams = new ArrayList<>();
124        for (final NgramProperty ngram : mNgrams) {
125            if (ngram.mNgramContext.getPrevWordCount() == 1) {
126                bigrams.add(ngram.mTargetWord);
127            }
128        }
129        return bigrams;
130    }
131
132    public int getProbability() {
133        return mProbabilityInfo.mProbability;
134    }
135
136    private static int computeHashCode(WordProperty word) {
137        return Arrays.hashCode(new Object[] {
138                word.mWord,
139                word.mProbabilityInfo,
140                word.mNgrams,
141                word.mIsNotAWord,
142                word.mIsPossiblyOffensive
143        });
144    }
145
146    /**
147     * Three-way comparison.
148     *
149     * A Word x is greater than a word y if x has a higher frequency. If they have the same
150     * frequency, they are sorted in lexicographic order.
151     */
152    @Override
153    public int compareTo(final WordProperty w) {
154        if (getProbability() < w.getProbability()) return 1;
155        if (getProbability() > w.getProbability()) return -1;
156        return mWord.compareTo(w.mWord);
157    }
158
159    /**
160     * Equality test.
161     *
162     * Words are equal if they have the same frequency, the same spellings, and the same
163     * attributes.
164     */
165    @Override
166    public boolean equals(Object o) {
167        if (o == this) return true;
168        if (!(o instanceof WordProperty)) return false;
169        WordProperty w = (WordProperty)o;
170        return mProbabilityInfo.equals(w.mProbabilityInfo)
171                && mWord.equals(w.mWord) && equals(mNgrams, w.mNgrams)
172                && mIsNotAWord == w.mIsNotAWord && mIsPossiblyOffensive == w.mIsPossiblyOffensive
173                && mHasNgrams == w.mHasNgrams;
174    }
175
176    // TDOO: Have a utility method like java.util.Objects.equals.
177    private static <T> boolean equals(final ArrayList<T> a, final ArrayList<T> b) {
178        if (null == a) {
179            return null == b;
180        }
181        return a.equals(b);
182    }
183
184    @Override
185    public int hashCode() {
186        if (mHashCode == 0) {
187            mHashCode = computeHashCode(this);
188        }
189        return mHashCode;
190    }
191
192    @UsedForTesting
193    public boolean isValid() {
194        return getProbability() != Dictionary.NOT_A_PROBABILITY;
195    }
196
197    @Override
198    public String toString() {
199        return CombinedFormatUtils.formatWordProperty(this);
200    }
201}
202