/* * Copyright (C) 2011 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.BinaryDictionary; import com.android.inputmethod.latin.Dictionary; import com.android.inputmethod.latin.NgramContext; import com.android.inputmethod.latin.NgramContext.WordInfo; import com.android.inputmethod.latin.common.StringUtils; import com.android.inputmethod.latin.utils.CombinedFormatUtils; import java.util.ArrayList; import java.util.Arrays; import javax.annotation.Nullable; /** * Utility class for a word with a probability. * * This is chiefly used to iterate a dictionary. */ public final class WordProperty implements Comparable { public final String mWord; public final ProbabilityInfo mProbabilityInfo; public final ArrayList mNgrams; // TODO: Support mIsBeginningOfSentence. public final boolean mIsBeginningOfSentence; public final boolean mIsNotAWord; public final boolean mIsPossiblyOffensive; public final boolean mHasNgrams; private int mHashCode = 0; // TODO: Support n-gram. @UsedForTesting public WordProperty(final String word, final ProbabilityInfo probabilityInfo, @Nullable final ArrayList bigrams, final boolean isNotAWord, final boolean isPossiblyOffensive) { mWord = word; mProbabilityInfo = probabilityInfo; if (null == bigrams) { mNgrams = null; } else { mNgrams = new ArrayList<>(); final NgramContext ngramContext = new NgramContext(new WordInfo(mWord)); for (final WeightedString bigramTarget : bigrams) { mNgrams.add(new NgramProperty(bigramTarget, ngramContext)); } } mIsBeginningOfSentence = false; mIsNotAWord = isNotAWord; mIsPossiblyOffensive = isPossiblyOffensive; mHasNgrams = bigrams != null && !bigrams.isEmpty(); } private static ProbabilityInfo createProbabilityInfoFromArray(final int[] probabilityInfo) { return new ProbabilityInfo( probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_PROBABILITY_INDEX], probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX], probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_LEVEL_INDEX], probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_COUNT_INDEX]); } // Construct word property using information from native code. // This represents invalid word when the probability is BinaryDictionary.NOT_A_PROBABILITY. public WordProperty(final int[] codePoints, final boolean isNotAWord, final boolean isPossiblyOffensive, final boolean hasBigram, final boolean isBeginningOfSentence, final int[] probabilityInfo, final ArrayList ngramPrevWordsArray, final ArrayList ngramPrevWordIsBeginningOfSentenceArray, final ArrayList ngramTargets, final ArrayList ngramProbabilityInfo) { mWord = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints); mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo); final ArrayList ngrams = new ArrayList<>(); mIsBeginningOfSentence = isBeginningOfSentence; mIsNotAWord = isNotAWord; mIsPossiblyOffensive = isPossiblyOffensive; mHasNgrams = hasBigram; final int relatedNgramCount = ngramTargets.size(); for (int i = 0; i < relatedNgramCount; i++) { final String ngramTargetString = StringUtils.getStringFromNullTerminatedCodePointArray(ngramTargets.get(i)); final WeightedString ngramTarget = new WeightedString(ngramTargetString, createProbabilityInfoFromArray(ngramProbabilityInfo.get(i))); final int[][] prevWords = ngramPrevWordsArray.get(i); final boolean[] isBeginningOfSentenceArray = ngramPrevWordIsBeginningOfSentenceArray.get(i); final WordInfo[] wordInfoArray = new WordInfo[prevWords.length]; for (int j = 0; j < prevWords.length; j++) { wordInfoArray[j] = isBeginningOfSentenceArray[j] ? WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO : new WordInfo(StringUtils.getStringFromNullTerminatedCodePointArray( prevWords[j])); } final NgramContext ngramContext = new NgramContext(wordInfoArray); ngrams.add(new NgramProperty(ngramTarget, ngramContext)); } mNgrams = ngrams.isEmpty() ? null : ngrams; } // TODO: Remove @UsedForTesting public ArrayList getBigrams() { if (null == mNgrams) { return null; } final ArrayList bigrams = new ArrayList<>(); for (final NgramProperty ngram : mNgrams) { if (ngram.mNgramContext.getPrevWordCount() == 1) { bigrams.add(ngram.mTargetWord); } } return bigrams; } public int getProbability() { return mProbabilityInfo.mProbability; } private static int computeHashCode(WordProperty word) { return Arrays.hashCode(new Object[] { word.mWord, word.mProbabilityInfo, word.mNgrams, word.mIsNotAWord, word.mIsPossiblyOffensive }); } /** * Three-way comparison. * * A Word x is greater than a word y if x has a higher frequency. If they have the same * frequency, they are sorted in lexicographic order. */ @Override public int compareTo(final WordProperty w) { if (getProbability() < w.getProbability()) return 1; if (getProbability() > w.getProbability()) return -1; return mWord.compareTo(w.mWord); } /** * Equality test. * * Words are equal if they have the same frequency, the same spellings, and the same * attributes. */ @Override public boolean equals(Object o) { if (o == this) return true; if (!(o instanceof WordProperty)) return false; WordProperty w = (WordProperty)o; return mProbabilityInfo.equals(w.mProbabilityInfo) && mWord.equals(w.mWord) && equals(mNgrams, w.mNgrams) && mIsNotAWord == w.mIsNotAWord && mIsPossiblyOffensive == w.mIsPossiblyOffensive && mHasNgrams == w.mHasNgrams; } // TDOO: Have a utility method like java.util.Objects.equals. private static boolean equals(final ArrayList a, final ArrayList b) { if (null == a) { return null == b; } return a.equals(b); } @Override public int hashCode() { if (mHashCode == 0) { mHashCode = computeHashCode(this); } return mHashCode; } @UsedForTesting public boolean isValid() { return getProbability() != Dictionary.NOT_A_PROBABILITY; } @Override public String toString() { return CombinedFormatUtils.formatWordProperty(this); } }