com_android_inputmethod_latin_BinaryDictionary.cpp revision 66facd37ddf8fc23ed2508a114c446147aaca724
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "LatinIME: jni: BinaryDictionary"
18
19#include "com_android_inputmethod_latin_BinaryDictionary.h"
20
21#include <cerrno>
22#include <cstring> // for memset()
23#include <fcntl.h>
24#include <sys/mman.h>
25#include <unistd.h>
26
27#include "defines.h"
28#include "jni.h"
29#include "jni_common.h"
30#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
31#include "suggest/core/dictionary/binary_dictionary_info.h"
32#include "suggest/core/dictionary/dictionary.h"
33#include "suggest/core/suggest_options.h"
34#include "utils/autocorrection_threshold_utils.h"
35
36namespace latinime {
37
38class ProximityInfo;
39
40// Helper method
41static void releaseDictBuf(const void *dictBuf, const size_t length, const int fd) {
42    int ret = munmap(const_cast<void *>(dictBuf), length);
43    if (ret != 0) {
44        AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno);
45    }
46    ret = close(fd);
47    if (ret != 0) {
48        AKLOGE("DICT: Failure in close. ret=%d errno=%d", ret, errno);
49    }
50}
51
52static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring sourceDir,
53        jlong dictOffset, jlong dictSize, jboolean isUpdatable) {
54    PROF_OPEN;
55    PROF_START(66);
56    // TODO: Move dictionary buffer handling to policyimpl.
57    const jsize sourceDirUtf8Length = env->GetStringUTFLength(sourceDir);
58    if (sourceDirUtf8Length <= 0) {
59        AKLOGE("DICT: Can't get sourceDir string");
60        return 0;
61    }
62    char sourceDirChars[sourceDirUtf8Length + 1];
63    env->GetStringUTFRegion(sourceDir, 0, env->GetStringLength(sourceDir), sourceDirChars);
64    sourceDirChars[sourceDirUtf8Length] = '\0';
65    int fd = 0;
66    void *dictBuf = 0;
67    int offset = 0;
68    const bool updatableMmap = (isUpdatable == JNI_TRUE);
69    const int openMode = updatableMmap ? O_RDWR : O_RDONLY;
70    fd = open(sourceDirChars, openMode);
71    if (fd < 0) {
72        AKLOGE("DICT: Can't open sourceDir. sourceDirChars=%s errno=%d", sourceDirChars, errno);
73        return 0;
74    }
75    int pagesize = getpagesize();
76    offset = static_cast<int>(dictOffset) % pagesize;
77    int adjDictOffset = static_cast<int>(dictOffset) - offset;
78    int adjDictSize = static_cast<int>(dictSize) + offset;
79    const int protMode = updatableMmap ? PROT_READ | PROT_WRITE : PROT_READ;
80    dictBuf = mmap(0, adjDictSize, protMode, MAP_PRIVATE, fd, adjDictOffset);
81    if (dictBuf == MAP_FAILED) {
82        AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno);
83        return 0;
84    }
85    dictBuf = static_cast<char *>(dictBuf) + offset;
86    if (!dictBuf) {
87        AKLOGE("DICT: dictBuf is null");
88        return 0;
89    }
90    Dictionary *dictionary = 0;
91    if (BinaryDictionaryFormatUtils::UNKNOWN_VERSION
92            == BinaryDictionaryFormatUtils::detectFormatVersion(static_cast<uint8_t *>(dictBuf),
93                    static_cast<int>(dictSize))) {
94        AKLOGE("DICT: dictionary format is unknown, bad magic number");
95        releaseDictBuf(static_cast<const char *>(dictBuf) - offset, adjDictSize, fd);
96    } else {
97        dictionary = new Dictionary(env, dictBuf, static_cast<int>(dictSize), fd, offset,
98                updatableMmap);
99    }
100    PROF_END(66);
101    PROF_CLOSE;
102    return reinterpret_cast<jlong>(dictionary);
103}
104
105static void latinime_BinaryDictionary_close(JNIEnv *env, jclass clazz, jlong dict) {
106    Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
107    if (!dictionary) return;
108    const BinaryDictionaryInfo *const binaryDictionaryInfo = dictionary->getBinaryDictionaryInfo();
109    const int dictBufOffset = binaryDictionaryInfo->getDictBufOffset();
110    const void *dictBuf = binaryDictionaryInfo->getDictBuf();
111    if (!dictBuf) return;
112    releaseDictBuf(static_cast<const char *>(dictBuf) - dictBufOffset,
113            binaryDictionaryInfo->getDictSize() + dictBufOffset,
114            binaryDictionaryInfo->getMmapFd());
115    delete dictionary;
116}
117
118static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, jlong dict,
119        jlong proximityInfo, jlong dicTraverseSession, jintArray xCoordinatesArray,
120        jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray,
121        jintArray inputCodePointsArray, jint inputSize, jint commitPoint, jintArray suggestOptions,
122        jintArray prevWordCodePointsForBigrams, jintArray outputCodePointsArray,
123        jintArray scoresArray, jintArray spaceIndicesArray, jintArray outputTypesArray) {
124    Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
125    if (!dictionary) return 0;
126    ProximityInfo *pInfo = reinterpret_cast<ProximityInfo *>(proximityInfo);
127    DicTraverseSession *traverseSession =
128            reinterpret_cast<DicTraverseSession *>(dicTraverseSession);
129
130    // Input values
131    int xCoordinates[inputSize];
132    int yCoordinates[inputSize];
133    int times[inputSize];
134    int pointerIds[inputSize];
135    const jsize inputCodePointsLength = env->GetArrayLength(inputCodePointsArray);
136    int inputCodePoints[inputCodePointsLength];
137    const jsize prevWordCodePointsLength =
138            prevWordCodePointsForBigrams ? env->GetArrayLength(prevWordCodePointsForBigrams) : 0;
139    int prevWordCodePointsInternal[prevWordCodePointsLength];
140    int *prevWordCodePoints = 0;
141    env->GetIntArrayRegion(xCoordinatesArray, 0, inputSize, xCoordinates);
142    env->GetIntArrayRegion(yCoordinatesArray, 0, inputSize, yCoordinates);
143    env->GetIntArrayRegion(timesArray, 0, inputSize, times);
144    env->GetIntArrayRegion(pointerIdsArray, 0, inputSize, pointerIds);
145    env->GetIntArrayRegion(inputCodePointsArray, 0, inputCodePointsLength, inputCodePoints);
146    if (prevWordCodePointsForBigrams) {
147        env->GetIntArrayRegion(prevWordCodePointsForBigrams, 0, prevWordCodePointsLength,
148                prevWordCodePointsInternal);
149        prevWordCodePoints = prevWordCodePointsInternal;
150    }
151
152    const jsize numberOfOptions = env->GetArrayLength(suggestOptions);
153    int options[numberOfOptions];
154    env->GetIntArrayRegion(suggestOptions, 0, numberOfOptions, options);
155    SuggestOptions givenSuggestOptions(options, numberOfOptions);
156
157    // Output values
158    /* By the way, let's check the output array length here to make sure */
159    const jsize outputCodePointsLength = env->GetArrayLength(outputCodePointsArray);
160    if (outputCodePointsLength != (MAX_WORD_LENGTH * MAX_RESULTS)) {
161        AKLOGE("Invalid outputCodePointsLength: %d", outputCodePointsLength);
162        ASSERT(false);
163        return 0;
164    }
165    const jsize scoresLength = env->GetArrayLength(scoresArray);
166    if (scoresLength != MAX_RESULTS) {
167        AKLOGE("Invalid scoresLength: %d", scoresLength);
168        ASSERT(false);
169        return 0;
170    }
171    int outputCodePoints[outputCodePointsLength];
172    int scores[scoresLength];
173    const jsize spaceIndicesLength = env->GetArrayLength(spaceIndicesArray);
174    int spaceIndices[spaceIndicesLength];
175    const jsize outputTypesLength = env->GetArrayLength(outputTypesArray);
176    int outputTypes[outputTypesLength];
177    memset(outputCodePoints, 0, sizeof(outputCodePoints));
178    memset(scores, 0, sizeof(scores));
179    memset(spaceIndices, 0, sizeof(spaceIndices));
180    memset(outputTypes, 0, sizeof(outputTypes));
181
182    int count;
183    if (givenSuggestOptions.isGesture() || inputSize > 0) {
184        count = dictionary->getSuggestions(pInfo, traverseSession, xCoordinates, yCoordinates,
185                times, pointerIds, inputCodePoints, inputSize, prevWordCodePoints,
186                prevWordCodePointsLength, commitPoint, &givenSuggestOptions, outputCodePoints,
187                scores, spaceIndices, outputTypes);
188    } else {
189        count = dictionary->getBigrams(prevWordCodePoints, prevWordCodePointsLength,
190                outputCodePoints, scores, outputTypes);
191    }
192
193    // Copy back the output values
194    env->SetIntArrayRegion(outputCodePointsArray, 0, outputCodePointsLength, outputCodePoints);
195    env->SetIntArrayRegion(scoresArray, 0, scoresLength, scores);
196    env->SetIntArrayRegion(spaceIndicesArray, 0, spaceIndicesLength, spaceIndices);
197    env->SetIntArrayRegion(outputTypesArray, 0, outputTypesLength, outputTypes);
198
199    return count;
200}
201
202static jint latinime_BinaryDictionary_getProbability(JNIEnv *env, jclass clazz, jlong dict,
203        jintArray word) {
204    Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
205    if (!dictionary) return NOT_A_PROBABILITY;
206    const jsize wordLength = env->GetArrayLength(word);
207    int codePoints[wordLength];
208    env->GetIntArrayRegion(word, 0, wordLength, codePoints);
209    return dictionary->getProbability(codePoints, wordLength);
210}
211
212static jboolean latinime_BinaryDictionary_isValidBigram(JNIEnv *env, jclass clazz, jlong dict,
213        jintArray word0, jintArray word1) {
214    Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
215    if (!dictionary) return JNI_FALSE;
216    const jsize word0Length = env->GetArrayLength(word0);
217    const jsize word1Length = env->GetArrayLength(word1);
218    int word0CodePoints[word0Length];
219    int word1CodePoints[word1Length];
220    env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
221    env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
222    return dictionary->isValidBigram(word0CodePoints, word0Length, word1CodePoints, word1Length);
223}
224
225static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jclass clazz,
226        jintArray before, jintArray after, jint score) {
227    jsize beforeLength = env->GetArrayLength(before);
228    jsize afterLength = env->GetArrayLength(after);
229    int beforeCodePoints[beforeLength];
230    int afterCodePoints[afterLength];
231    env->GetIntArrayRegion(before, 0, beforeLength, beforeCodePoints);
232    env->GetIntArrayRegion(after, 0, afterLength, afterCodePoints);
233    return AutocorrectionThresholdUtils::calcNormalizedScore(beforeCodePoints, beforeLength,
234            afterCodePoints, afterLength, score);
235}
236
237static jint latinime_BinaryDictionary_editDistance(JNIEnv *env, jclass clazz, jintArray before,
238        jintArray after) {
239    jsize beforeLength = env->GetArrayLength(before);
240    jsize afterLength = env->GetArrayLength(after);
241    int beforeCodePoints[beforeLength];
242    int afterCodePoints[afterLength];
243    env->GetIntArrayRegion(before, 0, beforeLength, beforeCodePoints);
244    env->GetIntArrayRegion(after, 0, afterLength, afterCodePoints);
245    return AutocorrectionThresholdUtils::editDistance(beforeCodePoints, beforeLength,
246            afterCodePoints, afterLength);
247}
248
249static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, jlong dict,
250        jintArray word, jint probability) {
251    Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
252    if (!dictionary) {
253        return;
254    }
255    jsize wordLength = env->GetArrayLength(word);
256    int codePoints[wordLength];
257    dictionary->addUnigramWord(codePoints, wordLength, probability);
258}
259
260static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict,
261        jintArray word0, jintArray word1, jint probability) {
262    Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
263    if (!dictionary) {
264        return;
265    }
266    jsize word0Length = env->GetArrayLength(word0);
267    int word0CodePoints[word0Length];
268    jsize word1Length = env->GetArrayLength(word1);
269    int word1CodePoints[word1Length];
270    dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints,
271            word1Length, probability);
272}
273
274static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass clazz, jlong dict,
275        jintArray word0, jintArray word1) {
276    Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
277    if (!dictionary) {
278        return;
279    }
280    jsize word0Length = env->GetArrayLength(word0);
281    int word0CodePoints[word0Length];
282    jsize word1Length = env->GetArrayLength(word1);
283    int word1CodePoints[word1Length];
284    dictionary->removeBigramWords(word0CodePoints, word0Length, word1CodePoints,
285            word1Length);
286}
287
288static const JNINativeMethod sMethods[] = {
289    {
290        const_cast<char *>("openNative"),
291        const_cast<char *>("(Ljava/lang/String;JJZ)J"),
292        reinterpret_cast<void *>(latinime_BinaryDictionary_open)
293    },
294    {
295        const_cast<char *>("closeNative"),
296        const_cast<char *>("(J)V"),
297        reinterpret_cast<void *>(latinime_BinaryDictionary_close)
298    },
299    {
300        const_cast<char *>("getSuggestionsNative"),
301        const_cast<char *>("(JJJ[I[I[I[I[III[I[I[I[I[I[I)I"),
302        reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions)
303    },
304    {
305        const_cast<char *>("getProbabilityNative"),
306        const_cast<char *>("(J[I)I"),
307        reinterpret_cast<void *>(latinime_BinaryDictionary_getProbability)
308    },
309    {
310        const_cast<char *>("isValidBigramNative"),
311        const_cast<char *>("(J[I[I)Z"),
312        reinterpret_cast<void *>(latinime_BinaryDictionary_isValidBigram)
313    },
314    {
315        const_cast<char *>("calcNormalizedScoreNative"),
316        const_cast<char *>("([I[II)F"),
317        reinterpret_cast<void *>(latinime_BinaryDictionary_calcNormalizedScore)
318    },
319    {
320        const_cast<char *>("editDistanceNative"),
321        const_cast<char *>("([I[I)I"),
322        reinterpret_cast<void *>(latinime_BinaryDictionary_editDistance)
323    },
324    {
325        const_cast<char *>("addUnigramWordNative"),
326        const_cast<char *>("(J[II)V"),
327        reinterpret_cast<void *>(latinime_BinaryDictionary_addUnigramWord)
328    },
329    {
330        const_cast<char *>("addBigramWordsNative"),
331        const_cast<char *>("(J[I[II)V"),
332        reinterpret_cast<void *>(latinime_BinaryDictionary_addBigramWords)
333    },
334    {
335        const_cast<char *>("removeBigramWordsNative"),
336        const_cast<char *>("(J[I[I)V"),
337        reinterpret_cast<void *>(latinime_BinaryDictionary_removeBigramWords)
338    }
339};
340
341int register_BinaryDictionary(JNIEnv *env) {
342    const char *const kClassPathName = "com/android/inputmethod/latin/BinaryDictionary";
343    return registerNativeMethods(env, kClassPathName, sMethods, NELEMS(sMethods));
344}
345} // namespace latinime
346