com_android_inputmethod_latin_BinaryDictionary.cpp revision 05b1e0d42f9f103516103d4d33e61862c0851e9d
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "LatinIME: jni: BinaryDictionary"
18
19#include "com_android_inputmethod_latin_BinaryDictionary.h"
20
21#include <cstring> // for memset()
22#include <vector>
23
24#include "defines.h"
25#include "jni.h"
26#include "jni_common.h"
27#include "suggest/core/dictionary/dictionary.h"
28#include "suggest/core/dictionary/property/unigram_property.h"
29#include "suggest/core/dictionary/property/word_property.h"
30#include "suggest/core/result/suggestion_results.h"
31#include "suggest/core/session/prev_words_info.h"
32#include "suggest/core/suggest_options.h"
33#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
34#include "utils/char_utils.h"
35#include "utils/jni_data_utils.h"
36#include "utils/log_utils.h"
37#include "utils/time_keeper.h"
38
39namespace latinime {
40
41class ProximityInfo;
42
43static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring sourceDir,
44        jlong dictOffset, jlong dictSize, jboolean isUpdatable) {
45    PROF_OPEN;
46    PROF_START(66);
47    const jsize sourceDirUtf8Length = env->GetStringUTFLength(sourceDir);
48    if (sourceDirUtf8Length <= 0) {
49        AKLOGE("DICT: Can't get sourceDir string");
50        return 0;
51    }
52    char sourceDirChars[sourceDirUtf8Length + 1];
53    env->GetStringUTFRegion(sourceDir, 0, env->GetStringLength(sourceDir), sourceDirChars);
54    sourceDirChars[sourceDirUtf8Length] = '\0';
55    DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy(
56            DictionaryStructureWithBufferPolicyFactory::newPolicyForExistingDictFile(
57                    sourceDirChars, static_cast<int>(dictOffset), static_cast<int>(dictSize),
58                    isUpdatable == JNI_TRUE));
59    if (!dictionaryStructureWithBufferPolicy) {
60        return 0;
61    }
62
63    Dictionary *const dictionary =
64            new Dictionary(env, std::move(dictionaryStructureWithBufferPolicy));
65    PROF_END(66);
66    PROF_CLOSE;
67    return reinterpret_cast<jlong>(dictionary);
68}
69
70static jlong latinime_BinaryDictionary_createOnMemory(JNIEnv *env, jclass clazz,
71        jlong formatVersion, jstring locale, jobjectArray attributeKeyStringArray,
72        jobjectArray attributeValueStringArray) {
73    const jsize localeUtf8Length = env->GetStringUTFLength(locale);
74    char localeChars[localeUtf8Length + 1];
75    env->GetStringUTFRegion(locale, 0, env->GetStringLength(locale), localeChars);
76    localeChars[localeUtf8Length] = '\0';
77    std::vector<int> localeCodePoints;
78    HeaderReadWriteUtils::insertCharactersIntoVector(localeChars, &localeCodePoints);
79    const int keyCount = env->GetArrayLength(attributeKeyStringArray);
80    const int valueCount = env->GetArrayLength(attributeValueStringArray);
81    if (keyCount != valueCount) {
82        return false;
83    }
84    DictionaryHeaderStructurePolicy::AttributeMap attributeMap =
85            JniDataUtils::constructAttributeMap(env, attributeKeyStringArray,
86                    attributeValueStringArray);
87    DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy =
88            DictionaryStructureWithBufferPolicyFactory::newPolicyForOnMemoryDict(
89                    formatVersion, localeCodePoints, &attributeMap);
90    if (!dictionaryStructureWithBufferPolicy) {
91        return 0;
92    }
93    Dictionary *const dictionary =
94            new Dictionary(env, std::move(dictionaryStructureWithBufferPolicy));
95    return reinterpret_cast<jlong>(dictionary);
96}
97
98static bool latinime_BinaryDictionary_flush(JNIEnv *env, jclass clazz, jlong dict,
99        jstring filePath) {
100    Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
101    if (!dictionary) return false;
102    const jsize filePathUtf8Length = env->GetStringUTFLength(filePath);
103    char filePathChars[filePathUtf8Length + 1];
104    env->GetStringUTFRegion(filePath, 0, env->GetStringLength(filePath), filePathChars);
105    filePathChars[filePathUtf8Length] = '\0';
106    return dictionary->flush(filePathChars);
107}
108
109static bool latinime_BinaryDictionary_needsToRunGC(JNIEnv *env, jclass clazz,
110        jlong dict, jboolean mindsBlockByGC) {
111    Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
112    if (!dictionary) return false;
113    return dictionary->needsToRunGC(mindsBlockByGC == JNI_TRUE);
114}
115
116static bool latinime_BinaryDictionary_flushWithGC(JNIEnv *env, jclass clazz, jlong dict,
117        jstring filePath) {
118    Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
119    if (!dictionary) return false;
120    const jsize filePathUtf8Length = env->GetStringUTFLength(filePath);
121    char filePathChars[filePathUtf8Length + 1];
122    env->GetStringUTFRegion(filePath, 0, env->GetStringLength(filePath), filePathChars);
123    filePathChars[filePathUtf8Length] = '\0';
124    return dictionary->flushWithGC(filePathChars);
125}
126
127static void latinime_BinaryDictionary_close(JNIEnv *env, jclass clazz, jlong dict) {
128    Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
129    if (!dictionary) return;
130    delete dictionary;
131}
132
133static void latinime_BinaryDictionary_getHeaderInfo(JNIEnv *env, jclass clazz, jlong dict,
134        jintArray outHeaderSize, jintArray outFormatVersion, jobject outAttributeKeys,
135        jobject outAttributeValues) {
136    Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
137    if (!dictionary) return;
138    const DictionaryHeaderStructurePolicy *const headerPolicy =
139            dictionary->getDictionaryStructurePolicy()->getHeaderStructurePolicy();
140    JniDataUtils::putIntToArray(env, outHeaderSize, 0 /* index */, headerPolicy->getSize());
141    JniDataUtils::putIntToArray(env, outFormatVersion, 0 /* index */,
142            headerPolicy->getFormatVersionNumber());
143    // Output attribute map
144    jclass arrayListClass = env->FindClass("java/util/ArrayList");
145    jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z");
146    const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap =
147            headerPolicy->getAttributeMap();
148    for (DictionaryHeaderStructurePolicy::AttributeMap::const_iterator it = attributeMap->begin();
149            it != attributeMap->end(); ++it) {
150        // Output key
151        jintArray keyCodePointArray = env->NewIntArray(it->first.size());
152        JniDataUtils::outputCodePoints(env, keyCodePointArray, 0 /* start */,
153                it->first.size(), it->first.data(), it->first.size(),
154                false /* needsNullTermination */);
155        env->CallBooleanMethod(outAttributeKeys, addMethodId, keyCodePointArray);
156        env->DeleteLocalRef(keyCodePointArray);
157        // Output value
158        jintArray valueCodePointArray = env->NewIntArray(it->second.size());
159        JniDataUtils::outputCodePoints(env, valueCodePointArray, 0 /* start */,
160                it->second.size(), it->second.data(), it->second.size(),
161                false /* needsNullTermination */);
162        env->CallBooleanMethod(outAttributeValues, addMethodId, valueCodePointArray);
163        env->DeleteLocalRef(valueCodePointArray);
164    }
165    env->DeleteLocalRef(arrayListClass);
166    return;
167}
168
169static int latinime_BinaryDictionary_getFormatVersion(JNIEnv *env, jclass clazz, jlong dict) {
170    Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
171    if (!dictionary) return 0;
172    const DictionaryHeaderStructurePolicy *const headerPolicy =
173            dictionary->getDictionaryStructurePolicy()->getHeaderStructurePolicy();
174    return headerPolicy->getFormatVersionNumber();
175}
176
177static void latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, jlong dict,
178        jlong proximityInfo, jlong dicTraverseSession, jintArray xCoordinatesArray,
179        jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray,
180        jintArray inputCodePointsArray, jint inputSize, jintArray suggestOptions,
181        jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray,
182        jintArray outSuggestionCount, jintArray outCodePointsArray, jintArray outScoresArray,
183        jintArray outSpaceIndicesArray, jintArray outTypesArray,
184        jintArray outAutoCommitFirstWordConfidenceArray, jfloatArray inOutLanguageWeight) {
185    Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
186    // Assign 0 to outSuggestionCount here in case of returning earlier in this method.
187    JniDataUtils::putIntToArray(env, outSuggestionCount, 0 /* index */, 0);
188    if (!dictionary) {
189        return;
190    }
191    ProximityInfo *pInfo = reinterpret_cast<ProximityInfo *>(proximityInfo);
192    DicTraverseSession *traverseSession =
193            reinterpret_cast<DicTraverseSession *>(dicTraverseSession);
194    if (!traverseSession) {
195        return;
196    }
197    // Input values
198    int xCoordinates[inputSize];
199    int yCoordinates[inputSize];
200    int times[inputSize];
201    int pointerIds[inputSize];
202    const jsize inputCodePointsLength = env->GetArrayLength(inputCodePointsArray);
203    int inputCodePoints[inputCodePointsLength];
204    env->GetIntArrayRegion(xCoordinatesArray, 0, inputSize, xCoordinates);
205    env->GetIntArrayRegion(yCoordinatesArray, 0, inputSize, yCoordinates);
206    env->GetIntArrayRegion(timesArray, 0, inputSize, times);
207    env->GetIntArrayRegion(pointerIdsArray, 0, inputSize, pointerIds);
208    env->GetIntArrayRegion(inputCodePointsArray, 0, inputCodePointsLength, inputCodePoints);
209
210    const jsize numberOfOptions = env->GetArrayLength(suggestOptions);
211    int options[numberOfOptions];
212    env->GetIntArrayRegion(suggestOptions, 0, numberOfOptions, options);
213    SuggestOptions givenSuggestOptions(options, numberOfOptions);
214
215    // Output values
216    /* By the way, let's check the output array length here to make sure */
217    const jsize outputCodePointsLength = env->GetArrayLength(outCodePointsArray);
218    if (outputCodePointsLength != (MAX_WORD_LENGTH * MAX_RESULTS)) {
219        AKLOGE("Invalid outputCodePointsLength: %d", outputCodePointsLength);
220        ASSERT(false);
221        return;
222    }
223    const jsize scoresLength = env->GetArrayLength(outScoresArray);
224    if (scoresLength != MAX_RESULTS) {
225        AKLOGE("Invalid scoresLength: %d", scoresLength);
226        ASSERT(false);
227        return;
228    }
229    const jsize outputAutoCommitFirstWordConfidenceLength =
230            env->GetArrayLength(outAutoCommitFirstWordConfidenceArray);
231    ASSERT(outputAutoCommitFirstWordConfidenceLength == 1);
232    if (outputAutoCommitFirstWordConfidenceLength != 1) {
233        // We only use the first result, as obviously we will only ever autocommit the first one
234        AKLOGE("Invalid outputAutoCommitFirstWordConfidenceLength: %d",
235                outputAutoCommitFirstWordConfidenceLength);
236        ASSERT(false);
237        return;
238    }
239    float languageWeight;
240    env->GetFloatArrayRegion(inOutLanguageWeight, 0, 1 /* len */, &languageWeight);
241    SuggestionResults suggestionResults(MAX_RESULTS);
242    const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
243            prevWordCodePointArrays, isBeginningOfSentenceArray);
244    if (givenSuggestOptions.isGesture() || inputSize > 0) {
245        // TODO: Use SuggestionResults to return suggestions.
246        dictionary->getSuggestions(pInfo, traverseSession, xCoordinates, yCoordinates,
247                times, pointerIds, inputCodePoints, inputSize, &prevWordsInfo,
248                &givenSuggestOptions, languageWeight, &suggestionResults);
249    } else {
250        dictionary->getPredictions(&prevWordsInfo, &suggestionResults);
251    }
252    suggestionResults.outputSuggestions(env, outSuggestionCount, outCodePointsArray,
253            outScoresArray, outSpaceIndicesArray, outTypesArray,
254            outAutoCommitFirstWordConfidenceArray, inOutLanguageWeight);
255}
256
257static jint latinime_BinaryDictionary_getProbability(JNIEnv *env, jclass clazz, jlong dict,
258        jintArray word) {
259    Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
260    if (!dictionary) return NOT_A_PROBABILITY;
261    const jsize wordLength = env->GetArrayLength(word);
262    int codePoints[wordLength];
263    env->GetIntArrayRegion(word, 0, wordLength, codePoints);
264    return dictionary->getProbability(codePoints, wordLength);
265}
266
267static jint latinime_BinaryDictionary_getMaxProbabilityOfExactMatches(
268        JNIEnv *env, jclass clazz, jlong dict, jintArray word) {
269    Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
270    if (!dictionary) return NOT_A_PROBABILITY;
271    const jsize wordLength = env->GetArrayLength(word);
272    int codePoints[wordLength];
273    env->GetIntArrayRegion(word, 0, wordLength, codePoints);
274    return dictionary->getMaxProbabilityOfExactMatches(codePoints, wordLength);
275}
276
277static jint latinime_BinaryDictionary_getNgramProbability(JNIEnv *env, jclass clazz,
278        jlong dict, jintArray word0, jboolean isBeginningOfSentence, jintArray word1) {
279    Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
280    if (!dictionary) return JNI_FALSE;
281    const jsize word0Length = env->GetArrayLength(word0);
282    const jsize word1Length = env->GetArrayLength(word1);
283    int word0CodePoints[word0Length];
284    int word1CodePoints[word1Length];
285    env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
286    env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
287    const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, isBeginningOfSentence);
288    return dictionary->getNgramProbability(&prevWordsInfo, word1CodePoints, word1Length);
289}
290
291// Method to iterate all words in the dictionary for makedict.
292// If token is 0, this method newly starts iterating the dictionary. This method returns 0 when
293// the dictionary does not have a next word.
294static jint latinime_BinaryDictionary_getNextWord(JNIEnv *env, jclass clazz,
295        jlong dict, jint token, jintArray outCodePoints, jbooleanArray outIsBeginningOfSentence) {
296    Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
297    if (!dictionary) return 0;
298    const jsize codePointBufSize = env->GetArrayLength(outCodePoints);
299    if (codePointBufSize != MAX_WORD_LENGTH) {
300        AKLOGE("Invalid outCodePointsLength: %d", codePointBufSize);
301        ASSERT(false);
302        return 0;
303    }
304    int wordCodePoints[codePointBufSize];
305    int wordCodePointCount = 0;
306    const int nextToken = dictionary->getNextWordAndNextToken(token, wordCodePoints,
307            &wordCodePointCount);
308    JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */,
309            MAX_WORD_LENGTH /* maxLength */, wordCodePoints, wordCodePointCount,
310            false /* needsNullTermination */);
311    bool isBeginningOfSentence = false;
312    if (wordCodePointCount > 0 && wordCodePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) {
313        isBeginningOfSentence = true;
314    }
315    JniDataUtils::putBooleanToArray(env, outIsBeginningOfSentence, 0 /* index */,
316            isBeginningOfSentence);
317    return nextToken;
318}
319
320static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
321        jlong dict, jintArray word, jboolean isBeginningOfSentence, jintArray outCodePoints,
322        jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outBigramTargets,
323        jobject outBigramProbabilityInfo, jobject outShortcutTargets,
324        jobject outShortcutProbabilities) {
325    Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
326    if (!dictionary) return;
327    const jsize wordLength = env->GetArrayLength(word);
328    if (wordLength > MAX_WORD_LENGTH) {
329        AKLOGE("Invalid wordLength: %d", wordLength);
330        return;
331    }
332    int wordCodePoints[MAX_WORD_LENGTH];
333    env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
334    int codePointCount = wordLength;
335    if (isBeginningOfSentence) {
336        codePointCount = CharUtils::attachBeginningOfSentenceMarker(
337                wordCodePoints, wordLength, MAX_WORD_LENGTH);
338        if (codePointCount < 0) {
339            AKLOGE("Cannot attach Beginning-of-Sentence marker.");
340            return;
341        }
342    }
343    const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, codePointCount);
344    wordProperty.outputProperties(env, outCodePoints, outFlags, outProbabilityInfo,
345            outBigramTargets, outBigramProbabilityInfo, outShortcutTargets,
346            outShortcutProbabilities);
347}
348
349static bool latinime_BinaryDictionary_addUnigramEntry(JNIEnv *env, jclass clazz, jlong dict,
350        jintArray word, jint probability, jintArray shortcutTarget, jint shortcutProbability,
351        jboolean isBeginningOfSentence, jboolean isNotAWord, jboolean isBlacklisted,
352        jint timestamp) {
353    Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
354    if (!dictionary) {
355        return false;
356    }
357    jsize codePointCount = env->GetArrayLength(word);
358    int codePoints[codePointCount];
359    env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
360    std::vector<UnigramProperty::ShortcutProperty> shortcuts;
361    std::vector<int> shortcutTargetCodePoints;
362    JniDataUtils::jintarrayToVector(env, shortcutTarget, &shortcutTargetCodePoints);
363    if (!shortcutTargetCodePoints.empty()) {
364        shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
365    }
366    // Use 1 for count to indicate the word has inputted.
367    const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord,
368            isBlacklisted, probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
369    return dictionary->addUnigramEntry(codePoints, codePointCount, &unigramProperty);
370}
371
372static bool latinime_BinaryDictionary_removeUnigramEntry(JNIEnv *env, jclass clazz, jlong dict,
373        jintArray word) {
374    Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
375    if (!dictionary) {
376        return false;
377    }
378    jsize codePointCount = env->GetArrayLength(word);
379    int codePoints[codePointCount];
380    env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
381    return dictionary->removeUnigramEntry(codePoints, codePointCount);
382}
383
384static bool latinime_BinaryDictionary_addNgramEntry(JNIEnv *env, jclass clazz, jlong dict,
385        jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray,
386        jintArray word, jint probability, jint timestamp) {
387    Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
388    if (!dictionary) {
389        return false;
390    }
391    const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
392            prevWordCodePointArrays, isBeginningOfSentenceArray);
393    jsize wordLength = env->GetArrayLength(word);
394    int wordCodePoints[wordLength];
395    env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
396    const std::vector<int> bigramTargetCodePoints(
397            wordCodePoints, wordCodePoints + wordLength);
398    // Use 1 for count to indicate the bigram has inputted.
399    const BigramProperty bigramProperty(&bigramTargetCodePoints, probability,
400            timestamp, 0 /* level */, 1 /* count */);
401    return dictionary->addNgramEntry(&prevWordsInfo, &bigramProperty);
402}
403
404static bool latinime_BinaryDictionary_removeNgramEntry(JNIEnv *env, jclass clazz, jlong dict,
405        jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray,
406        jintArray word) {
407    Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
408    if (!dictionary) {
409        return false;
410    }
411    const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
412            prevWordCodePointArrays, isBeginningOfSentenceArray);
413    jsize wordLength = env->GetArrayLength(word);
414    int wordCodePoints[wordLength];
415    env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
416    return dictionary->removeNgramEntry(&prevWordsInfo, wordCodePoints, wordLength);
417}
418
419// Returns how many language model params are processed.
420static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, jclass clazz,
421        jlong dict, jobjectArray languageModelParams, jint startIndex) {
422    Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
423    if (!dictionary) {
424        return 0;
425    }
426    jsize languageModelParamCount = env->GetArrayLength(languageModelParams);
427    if (languageModelParamCount == 0 || startIndex >= languageModelParamCount) {
428        return 0;
429    }
430    jobject languageModelParam = env->GetObjectArrayElement(languageModelParams, 0);
431    jclass languageModelParamClass = env->GetObjectClass(languageModelParam);
432    env->DeleteLocalRef(languageModelParam);
433
434    jfieldID word0FieldId = env->GetFieldID(languageModelParamClass, "mWord0", "[I");
435    jfieldID word1FieldId = env->GetFieldID(languageModelParamClass, "mWord1", "[I");
436    jfieldID unigramProbabilityFieldId =
437            env->GetFieldID(languageModelParamClass, "mUnigramProbability", "I");
438    jfieldID bigramProbabilityFieldId =
439            env->GetFieldID(languageModelParamClass, "mBigramProbability", "I");
440    jfieldID timestampFieldId =
441            env->GetFieldID(languageModelParamClass, "mTimestamp", "I");
442    jfieldID shortcutTargetFieldId =
443            env->GetFieldID(languageModelParamClass, "mShortcutTarget", "[I");
444    jfieldID shortcutProbabilityFieldId =
445            env->GetFieldID(languageModelParamClass, "mShortcutProbability", "I");
446    jfieldID isNotAWordFieldId =
447            env->GetFieldID(languageModelParamClass, "mIsNotAWord", "Z");
448    jfieldID isBlacklistedFieldId =
449            env->GetFieldID(languageModelParamClass, "mIsBlacklisted", "Z");
450    env->DeleteLocalRef(languageModelParamClass);
451
452    for (int i = startIndex; i < languageModelParamCount; ++i) {
453        jobject languageModelParam = env->GetObjectArrayElement(languageModelParams, i);
454        // languageModelParam is a set of params for word1; thus, word1 cannot be null. On the
455        // other hand, word0 can be null and then it means the set of params doesn't contain bigram
456        // information.
457        jintArray word0 = static_cast<jintArray>(
458                env->GetObjectField(languageModelParam, word0FieldId));
459        jsize word0Length = word0 ? env->GetArrayLength(word0) : 0;
460        int word0CodePoints[word0Length];
461        if (word0) {
462            env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
463        }
464        jintArray word1 = static_cast<jintArray>(
465                env->GetObjectField(languageModelParam, word1FieldId));
466        jsize word1Length = env->GetArrayLength(word1);
467        int word1CodePoints[word1Length];
468        env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
469        jint unigramProbability = env->GetIntField(languageModelParam, unigramProbabilityFieldId);
470        jint timestamp = env->GetIntField(languageModelParam, timestampFieldId);
471        jboolean isNotAWord = env->GetBooleanField(languageModelParam, isNotAWordFieldId);
472        jboolean isBlacklisted = env->GetBooleanField(languageModelParam, isBlacklistedFieldId);
473        jintArray shortcutTarget = static_cast<jintArray>(
474                env->GetObjectField(languageModelParam, shortcutTargetFieldId));
475        std::vector<UnigramProperty::ShortcutProperty> shortcuts;
476        std::vector<int> shortcutTargetCodePoints;
477        JniDataUtils::jintarrayToVector(env, shortcutTarget, &shortcutTargetCodePoints);
478        if (!shortcutTargetCodePoints.empty()) {
479            jint shortcutProbability =
480                    env->GetIntField(languageModelParam, shortcutProbabilityFieldId);
481            shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
482        }
483        // Use 1 for count to indicate the word has inputted.
484        const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
485                isBlacklisted, unigramProbability, timestamp, 0 /* level */, 1 /* count */,
486                &shortcuts);
487        dictionary->addUnigramEntry(word1CodePoints, word1Length, &unigramProperty);
488        if (word0) {
489            jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
490            const std::vector<int> bigramTargetCodePoints(
491                    word1CodePoints, word1CodePoints + word1Length);
492            // Use 1 for count to indicate the bigram has inputted.
493            const BigramProperty bigramProperty(&bigramTargetCodePoints, bigramProbability,
494                    timestamp, 0 /* level */, 1 /* count */);
495            const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length,
496                    false /* isBeginningOfSentence */);
497            dictionary->addNgramEntry(&prevWordsInfo, &bigramProperty);
498        }
499        if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) {
500            return i + 1;
501        }
502        env->DeleteLocalRef(word0);
503        env->DeleteLocalRef(word1);
504        env->DeleteLocalRef(shortcutTarget);
505        env->DeleteLocalRef(languageModelParam);
506    }
507    return languageModelParamCount;
508}
509
510static jstring latinime_BinaryDictionary_getProperty(JNIEnv *env, jclass clazz, jlong dict,
511        jstring query) {
512    Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
513    if (!dictionary) {
514        return env->NewStringUTF("");
515    }
516    const jsize queryUtf8Length = env->GetStringUTFLength(query);
517    char queryChars[queryUtf8Length + 1];
518    env->GetStringUTFRegion(query, 0, env->GetStringLength(query), queryChars);
519    queryChars[queryUtf8Length] = '\0';
520    static const int GET_PROPERTY_RESULT_LENGTH = 100;
521    char resultChars[GET_PROPERTY_RESULT_LENGTH];
522    resultChars[0] = '\0';
523    dictionary->getProperty(queryChars, queryUtf8Length, resultChars, GET_PROPERTY_RESULT_LENGTH);
524    return env->NewStringUTF(resultChars);
525}
526
527static bool latinime_BinaryDictionary_isCorruptedNative(JNIEnv *env, jclass clazz, jlong dict) {
528    Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
529    if (!dictionary) {
530        return false;
531    }
532    return dictionary->getDictionaryStructurePolicy()->isCorrupted();
533}
534
535static DictionaryStructureWithBufferPolicy::StructurePolicyPtr runGCAndGetNewStructurePolicy(
536        DictionaryStructureWithBufferPolicy::StructurePolicyPtr structurePolicy,
537        const char *const dictFilePath) {
538    structurePolicy->flushWithGC(dictFilePath);
539    structurePolicy.release();
540    return DictionaryStructureWithBufferPolicyFactory::newPolicyForExistingDictFile(
541            dictFilePath, 0 /* offset */, 0 /* size */, true /* isUpdatable */);
542}
543
544static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, jlong dict,
545        jstring dictFilePath, jlong newFormatVersion) {
546    Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
547    if (!dictionary) {
548        return false;
549    }
550    const jsize filePathUtf8Length = env->GetStringUTFLength(dictFilePath);
551    char dictFilePathChars[filePathUtf8Length + 1];
552    env->GetStringUTFRegion(dictFilePath, 0, env->GetStringLength(dictFilePath), dictFilePathChars);
553    dictFilePathChars[filePathUtf8Length] = '\0';
554
555    const DictionaryHeaderStructurePolicy *const headerPolicy =
556            dictionary->getDictionaryStructurePolicy()->getHeaderStructurePolicy();
557    DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy =
558            DictionaryStructureWithBufferPolicyFactory::newPolicyForOnMemoryDict(
559                    newFormatVersion, *headerPolicy->getLocale(), headerPolicy->getAttributeMap());
560    if (!dictionaryStructureWithBufferPolicy) {
561        LogUtils::logToJava(env, "Cannot migrate header.");
562        return false;
563    }
564
565    int wordCodePoints[MAX_WORD_LENGTH];
566    int wordCodePointCount = 0;
567    int token = 0;
568    // Add unigrams.
569    do {
570        token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount);
571        const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints,
572                wordCodePointCount);
573        if (wordCodePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) {
574            // Skip beginning-of-sentence unigram.
575            continue;
576        }
577        if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) {
578            dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy(
579                    std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars);
580            if (!dictionaryStructureWithBufferPolicy) {
581                LogUtils::logToJava(env, "Cannot open dict after GC.");
582                return false;
583            }
584        }
585        if (!dictionaryStructureWithBufferPolicy->addUnigramEntry(wordCodePoints,
586                wordCodePointCount, wordProperty.getUnigramProperty())) {
587            LogUtils::logToJava(env, "Cannot add unigram to the new dict.");
588            return false;
589        }
590    } while (token != 0);
591
592    // Add bigrams.
593    do {
594        token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount);
595        const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints,
596                wordCodePointCount);
597        if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) {
598            dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy(
599                    std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars);
600            if (!dictionaryStructureWithBufferPolicy) {
601                LogUtils::logToJava(env, "Cannot open dict after GC.");
602                return false;
603            }
604        }
605        const PrevWordsInfo prevWordsInfo(wordCodePoints, wordCodePointCount,
606                wordProperty.getUnigramProperty()->representsBeginningOfSentence());
607        for (const BigramProperty &bigramProperty : *wordProperty.getBigramProperties()) {
608            if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&prevWordsInfo,
609                    &bigramProperty)) {
610                LogUtils::logToJava(env, "Cannot add bigram to the new dict.");
611                return false;
612            }
613        }
614    } while (token != 0);
615    // Save to File.
616    dictionaryStructureWithBufferPolicy->flushWithGC(dictFilePathChars);
617    return true;
618}
619
620static const JNINativeMethod sMethods[] = {
621    {
622        const_cast<char *>("openNative"),
623        const_cast<char *>("(Ljava/lang/String;JJZ)J"),
624        reinterpret_cast<void *>(latinime_BinaryDictionary_open)
625    },
626    {
627        const_cast<char *>("createOnMemoryNative"),
628        const_cast<char *>("(JLjava/lang/String;[Ljava/lang/String;[Ljava/lang/String;)J"),
629        reinterpret_cast<void *>(latinime_BinaryDictionary_createOnMemory)
630    },
631    {
632        const_cast<char *>("closeNative"),
633        const_cast<char *>("(J)V"),
634        reinterpret_cast<void *>(latinime_BinaryDictionary_close)
635    },
636    {
637        const_cast<char *>("getFormatVersionNative"),
638        const_cast<char *>("(J)I"),
639        reinterpret_cast<void *>(latinime_BinaryDictionary_getFormatVersion)
640    },
641    {
642        const_cast<char *>("getHeaderInfoNative"),
643        const_cast<char *>("(J[I[ILjava/util/ArrayList;Ljava/util/ArrayList;)V"),
644        reinterpret_cast<void *>(latinime_BinaryDictionary_getHeaderInfo)
645    },
646    {
647        const_cast<char *>("flushNative"),
648        const_cast<char *>("(JLjava/lang/String;)Z"),
649        reinterpret_cast<void *>(latinime_BinaryDictionary_flush)
650    },
651    {
652        const_cast<char *>("needsToRunGCNative"),
653        const_cast<char *>("(JZ)Z"),
654        reinterpret_cast<void *>(latinime_BinaryDictionary_needsToRunGC)
655    },
656    {
657        const_cast<char *>("flushWithGCNative"),
658        const_cast<char *>("(JLjava/lang/String;)Z"),
659        reinterpret_cast<void *>(latinime_BinaryDictionary_flushWithGC)
660    },
661    {
662        const_cast<char *>("getSuggestionsNative"),
663        const_cast<char *>("(JJJ[I[I[I[I[II[I[[I[Z[I[I[I[I[I[I[F)V"),
664        reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions)
665    },
666    {
667        const_cast<char *>("getProbabilityNative"),
668        const_cast<char *>("(J[I)I"),
669        reinterpret_cast<void *>(latinime_BinaryDictionary_getProbability)
670    },
671    {
672        const_cast<char *>("getMaxProbabilityOfExactMatchesNative"),
673        const_cast<char *>("(J[I)I"),
674        reinterpret_cast<void *>(latinime_BinaryDictionary_getMaxProbabilityOfExactMatches)
675    },
676    {
677        const_cast<char *>("getNgramProbabilityNative"),
678        const_cast<char *>("(J[[I[Z[I)I"),
679        reinterpret_cast<void *>(latinime_BinaryDictionary_getNgramProbability)
680    },
681    {
682        const_cast<char *>("getWordPropertyNative"),
683        const_cast<char *>("(J[IZ[I[Z[ILjava/util/ArrayList;Ljava/util/ArrayList;"
684                "Ljava/util/ArrayList;Ljava/util/ArrayList;)V"),
685        reinterpret_cast<void *>(latinime_BinaryDictionary_getWordProperty)
686    },
687    {
688        const_cast<char *>("getNextWordNative"),
689        const_cast<char *>("(JI[I[Z)I"),
690        reinterpret_cast<void *>(latinime_BinaryDictionary_getNextWord)
691    },
692    {
693        const_cast<char *>("addUnigramEntryNative"),
694        const_cast<char *>("(J[II[IIZZZI)Z"),
695        reinterpret_cast<void *>(latinime_BinaryDictionary_addUnigramEntry)
696    },
697    {
698        const_cast<char *>("removeUnigramEntryNative"),
699        const_cast<char *>("(J[I)Z"),
700        reinterpret_cast<void *>(latinime_BinaryDictionary_removeUnigramEntry)
701    },
702    {
703        const_cast<char *>("addNgramEntryNative"),
704        const_cast<char *>("(J[[I[Z[III)Z"),
705        reinterpret_cast<void *>(latinime_BinaryDictionary_addNgramEntry)
706    },
707    {
708        const_cast<char *>("removeNgramEntryNative"),
709        const_cast<char *>("(J[[I[Z[I)Z"),
710        reinterpret_cast<void *>(latinime_BinaryDictionary_removeNgramEntry)
711    },
712    {
713        const_cast<char *>("addMultipleDictionaryEntriesNative"),
714        const_cast<char *>(
715                "(J[Lcom/android/inputmethod/latin/utils/LanguageModelParam;I)I"),
716        reinterpret_cast<void *>(latinime_BinaryDictionary_addMultipleDictionaryEntries)
717    },
718    {
719        const_cast<char *>("getPropertyNative"),
720        const_cast<char *>("(JLjava/lang/String;)Ljava/lang/String;"),
721        reinterpret_cast<void *>(latinime_BinaryDictionary_getProperty)
722    },
723    {
724        const_cast<char *>("isCorruptedNative"),
725        const_cast<char *>("(J)Z"),
726        reinterpret_cast<void *>(latinime_BinaryDictionary_isCorruptedNative)
727    },
728    {
729        const_cast<char *>("migrateNative"),
730        const_cast<char *>("(JLjava/lang/String;J)Z"),
731        reinterpret_cast<void *>(latinime_BinaryDictionary_migrateNative)
732    }
733};
734
735int register_BinaryDictionary(JNIEnv *env) {
736    const char *const kClassPathName = "com/android/inputmethod/latin/BinaryDictionary";
737    return registerNativeMethods(env, kClassPathName, sMethods, NELEMS(sMethods));
738}
739} // namespace latinime
740