jni_data_utils.h revision 16cc3992d7468ef781753df7b4227330e0834501
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef LATINIME_JNI_DATA_UTILS_H
18#define LATINIME_JNI_DATA_UTILS_H
19
20#include <vector>
21
22#include "defines.h"
23#include "jni.h"
24#include "suggest/core/session/ngram_context.h"
25#include "suggest/core/policy/dictionary_header_structure_policy.h"
26#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
27#include "utils/char_utils.h"
28
29namespace latinime {
30
31class JniDataUtils {
32 public:
33    static void jintarrayToVector(JNIEnv *env, jintArray array, std::vector<int> *const outVector) {
34        if (!array) {
35            outVector->clear();
36            return;
37        }
38        const jsize arrayLength = env->GetArrayLength(array);
39        outVector->resize(arrayLength);
40        env->GetIntArrayRegion(array, 0 /* start */, arrayLength, outVector->data());
41    }
42
43    static DictionaryHeaderStructurePolicy::AttributeMap constructAttributeMap(JNIEnv *env,
44            jobjectArray attributeKeyStringArray, jobjectArray attributeValueStringArray) {
45        DictionaryHeaderStructurePolicy::AttributeMap attributeMap;
46        const int keyCount = env->GetArrayLength(attributeKeyStringArray);
47        for (int i = 0; i < keyCount; i++) {
48            jstring keyString = static_cast<jstring>(
49                    env->GetObjectArrayElement(attributeKeyStringArray, i));
50            const jsize keyUtf8Length = env->GetStringUTFLength(keyString);
51            char keyChars[keyUtf8Length + 1];
52            env->GetStringUTFRegion(keyString, 0, env->GetStringLength(keyString), keyChars);
53            env->DeleteLocalRef(keyString);
54            keyChars[keyUtf8Length] = '\0';
55            DictionaryHeaderStructurePolicy::AttributeMap::key_type key;
56            HeaderReadWriteUtils::insertCharactersIntoVector(keyChars, &key);
57
58            jstring valueString = static_cast<jstring>(
59                    env->GetObjectArrayElement(attributeValueStringArray, i));
60            const jsize valueUtf8Length = env->GetStringUTFLength(valueString);
61            char valueChars[valueUtf8Length + 1];
62            env->GetStringUTFRegion(valueString, 0, env->GetStringLength(valueString), valueChars);
63            env->DeleteLocalRef(valueString);
64            valueChars[valueUtf8Length] = '\0';
65            DictionaryHeaderStructurePolicy::AttributeMap::mapped_type value;
66            HeaderReadWriteUtils::insertCharactersIntoVector(valueChars, &value);
67            attributeMap[key] = value;
68        }
69        return attributeMap;
70    }
71
72    static void outputCodePoints(JNIEnv *env, jintArray intArrayToOutputCodePoints, const int start,
73            const int maxLength, const int *const codePoints, const int codePointCount,
74            const bool needsNullTermination) {
75        const int codePointBufSize = std::min(maxLength, codePointCount);
76        int outputCodePonts[codePointBufSize];
77        int outputCodePointCount = 0;
78        for (int i = 0; i < codePointBufSize; ++i) {
79            const int codePoint = codePoints[i];
80            int codePointToOutput = codePoint;
81            if (!CharUtils::isInUnicodeSpace(codePoint)) {
82                if (codePoint == CODE_POINT_BEGINNING_OF_SENTENCE) {
83                    // Just skip Beginning-of-Sentence marker.
84                    continue;
85                }
86                codePointToOutput = CODE_POINT_REPLACEMENT_CHARACTER;
87            } else if (codePoint >= 0x01 && codePoint <= 0x1F) {
88                // Control code.
89                codePointToOutput = CODE_POINT_REPLACEMENT_CHARACTER;
90            }
91            outputCodePonts[outputCodePointCount++] = codePointToOutput;
92        }
93        env->SetIntArrayRegion(intArrayToOutputCodePoints, start, outputCodePointCount,
94                outputCodePonts);
95        if (needsNullTermination && outputCodePointCount < maxLength) {
96            env->SetIntArrayRegion(intArrayToOutputCodePoints, start + outputCodePointCount,
97                    1 /* len */, &CODE_POINT_NULL);
98        }
99    }
100
101    static NgramContext constructNgramContext(JNIEnv *env, jobjectArray prevWordCodePointArrays,
102            jbooleanArray isBeginningOfSentenceArray, const size_t prevWordCount) {
103        int prevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
104        int prevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
105        bool isBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
106        for (size_t i = 0; i < prevWordCount; ++i) {
107            prevWordCodePointCount[i] = 0;
108            isBeginningOfSentence[i] = false;
109            jintArray prevWord = (jintArray)env->GetObjectArrayElement(prevWordCodePointArrays, i);
110            if (!prevWord) {
111                continue;
112            }
113            jsize prevWordLength = env->GetArrayLength(prevWord);
114            if (prevWordLength > MAX_WORD_LENGTH) {
115                continue;
116            }
117            env->GetIntArrayRegion(prevWord, 0, prevWordLength, prevWordCodePoints[i]);
118            env->DeleteLocalRef(prevWord);
119            prevWordCodePointCount[i] = prevWordLength;
120            jboolean isBeginningOfSentenceBoolean = JNI_FALSE;
121            env->GetBooleanArrayRegion(isBeginningOfSentenceArray, i, 1 /* len */,
122                    &isBeginningOfSentenceBoolean);
123            isBeginningOfSentence[i] = isBeginningOfSentenceBoolean == JNI_TRUE;
124        }
125        return NgramContext(prevWordCodePoints, prevWordCodePointCount, isBeginningOfSentence,
126                prevWordCount);
127    }
128
129    static void putBooleanToArray(JNIEnv *env, jbooleanArray array, const int index,
130            const jboolean value) {
131        env->SetBooleanArrayRegion(array, index, 1 /* len */, &value);
132    }
133
134    static void putIntToArray(JNIEnv *env, jintArray array, const int index, const int value) {
135        env->SetIntArrayRegion(array, index, 1 /* len */, &value);
136    }
137
138    static void putFloatToArray(JNIEnv *env, jfloatArray array, const int index,
139            const float value) {
140        env->SetFloatArrayRegion(array, index, 1 /* len */, &value);
141    }
142
143 private:
144    DISALLOW_IMPLICIT_CONSTRUCTORS(JniDataUtils);
145
146    static const int CODE_POINT_REPLACEMENT_CHARACTER;
147    static const int CODE_POINT_NULL;
148};
149} // namespace latinime
150#endif // LATINIME_JNI_DATA_UTILS_H
151